From c05e03bb5a8c1ea4440636093857b64d5e6281c9 Mon Sep 17 00:00:00 2001 From: MarkJoyMa <64180138+MarkJoyMa@users.noreply.github.com> Date: Thu, 26 Oct 2023 23:51:28 +0800 Subject: [PATCH] feat: add metrics (#3624) --- core/metric/histogram.go | 32 ++-- core/metric/histogram_test.go | 15 +- core/stores/redis/hook.go | 3 +- core/stores/redis/metrics.go | 170 +++++++++++++++++- core/stores/redis/metrics_test.go | 130 ++++++++++++++ core/stores/redis/redisclientmanager.go | 16 +- core/stores/redis/redisclustermanager.go | 16 +- core/stores/sqlx/metrics.go | 146 ++++++++++++++- core/stores/sqlx/metrics_test.go | 147 +++++++++++++++ core/stores/sqlx/stmt.go | 3 +- rest/httpc/internal/metricsinterceptor.go | 71 ++++++++ .../httpc/internal/metricsinterceptor_test.go | 35 ++++ .../prometheusinterceptor.go | 2 +- .../prometheusinterceptor.go | 2 +- 14 files changed, 759 insertions(+), 29 deletions(-) create mode 100644 core/stores/redis/metrics_test.go create mode 100644 core/stores/sqlx/metrics_test.go create mode 100644 rest/httpc/internal/metricsinterceptor.go create mode 100644 rest/httpc/internal/metricsinterceptor_test.go diff --git a/core/metric/histogram.go b/core/metric/histogram.go index 03d8903c..a40c5365 100644 --- a/core/metric/histogram.go +++ b/core/metric/histogram.go @@ -8,18 +8,21 @@ import ( type ( // A HistogramVecOpts is a histogram vector options. HistogramVecOpts struct { - Namespace string - Subsystem string - Name string - Help string - Labels []string - Buckets []float64 + Namespace string + Subsystem string + Name string + Help string + Labels []string + Buckets []float64 + ConstLabels map[string]string } // A HistogramVec interface represents a histogram vector. HistogramVec interface { // Observe adds observation v to labels. Observe(v int64, labels ...string) + // ObserveFloat allow to observe float64 values. + ObserveFloat(v float64, labels ...string) close() bool } @@ -35,11 +38,12 @@ func NewHistogramVec(cfg *HistogramVecOpts) HistogramVec { } vec := prom.NewHistogramVec(prom.HistogramOpts{ - Namespace: cfg.Namespace, - Subsystem: cfg.Subsystem, - Name: cfg.Name, - Help: cfg.Help, - Buckets: cfg.Buckets, + Namespace: cfg.Namespace, + Subsystem: cfg.Subsystem, + Name: cfg.Name, + Help: cfg.Help, + Buckets: cfg.Buckets, + ConstLabels: cfg.ConstLabels, }, cfg.Labels) prom.MustRegister(vec) hv := &promHistogramVec{ @@ -58,6 +62,12 @@ func (hv *promHistogramVec) Observe(v int64, labels ...string) { }) } +func (hv *promHistogramVec) ObserveFloat(v float64, labels ...string) { + update(func() { + hv.histogram.WithLabelValues(labels...).Observe(v) + }) +} + func (hv *promHistogramVec) close() bool { return prom.Unregister(hv.histogram) } diff --git a/core/metric/histogram_test.go b/core/metric/histogram_test.go index 4874617b..b4ba7275 100644 --- a/core/metric/histogram_test.go +++ b/core/metric/histogram_test.go @@ -14,7 +14,7 @@ func TestNewHistogramVec(t *testing.T) { Help: "rpc server requests duration(ms).", Buckets: []float64{1, 2, 3}, }) - defer histogramVec.close() + defer histogramVec.(*promHistogramVec).close() histogramVecNil := NewHistogramVec(nil) assert.NotNil(t, histogramVec) assert.Nil(t, histogramVecNil) @@ -28,9 +28,10 @@ func TestHistogramObserve(t *testing.T) { Buckets: []float64{1, 2, 3}, Labels: []string{"method"}, }) - defer histogramVec.close() + defer histogramVec.(*promHistogramVec).close() hv, _ := histogramVec.(*promHistogramVec) hv.Observe(2, "/Users") + hv.ObserveFloat(1.1, "/Users") metadata := ` # HELP counts rpc server requests duration(ms). @@ -38,11 +39,11 @@ func TestHistogramObserve(t *testing.T) { ` val := ` counts_bucket{method="/Users",le="1"} 0 - counts_bucket{method="/Users",le="2"} 1 - counts_bucket{method="/Users",le="3"} 1 - counts_bucket{method="/Users",le="+Inf"} 1 - counts_sum{method="/Users"} 2 - counts_count{method="/Users"} 1 + counts_bucket{method="/Users",le="2"} 2 + counts_bucket{method="/Users",le="3"} 2 + counts_bucket{method="/Users",le="+Inf"} 2 + counts_sum{method="/Users"} 3.1 + counts_count{method="/Users"} 2 ` err := testutil.CollectAndCompare(hv.histogram, strings.NewReader(metadata+val)) diff --git a/core/stores/redis/hook.go b/core/stores/redis/hook.go index b7a0a332..46dfad1e 100644 --- a/core/stores/redis/hook.go +++ b/core/stores/redis/hook.go @@ -54,9 +54,10 @@ func (h hook) AfterProcess(ctx context.Context, cmd red.Cmder) error { duration := timex.Since(start) if duration > slowThreshold.Load() { logDuration(ctx, []red.Cmder{cmd}, duration) + metricSlowCount.Inc(cmd.Name()) } - metricReqDur.Observe(duration.Milliseconds(), cmd.Name()) + metricReqDur.ObserveFloat(float64(duration)/float64(time.Millisecond), cmd.Name()) if msg := formatError(err); len(msg) > 0 { metricReqErr.Inc(cmd.Name(), msg) } diff --git a/core/stores/redis/metrics.go b/core/stores/redis/metrics.go index 2cb381f4..955091e2 100644 --- a/core/stores/redis/metrics.go +++ b/core/stores/redis/metrics.go @@ -1,6 +1,12 @@ package redis -import "github.com/zeromicro/go-zero/core/metric" +import ( + "sync" + + red "github.com/go-redis/redis/v8" + "github.com/prometheus/client_golang/prometheus" + "github.com/zeromicro/go-zero/core/metric" +) const namespace = "redis_client" @@ -11,7 +17,7 @@ var ( Name: "duration_ms", Help: "redis client requests duration(ms).", Labels: []string{"command"}, - Buckets: []float64{5, 10, 25, 50, 100, 250, 500, 1000, 2500}, + Buckets: []float64{0.25, 0.5, 1, 1.5, 2, 3, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 15000}, }) metricReqErr = metric.NewCounterVec(&metric.CounterVecOpts{ Namespace: namespace, @@ -20,4 +26,164 @@ var ( Help: "redis client requests error count.", Labels: []string{"command", "error"}, }) + metricSlowCount = metric.NewCounterVec(&metric.CounterVecOpts{ + Namespace: namespace, + Subsystem: "requests", + Name: "slow_total", + Help: "redis client requests slow count.", + Labels: []string{"command"}, + }) + + connLabels = []string{"key", "client_type"} + + connCollector = newCollector() + + _ prometheus.Collector = (*collector)(nil) ) + +type ( + statGetter struct { + clientType string + key string + poolSize int + poolStats func() *red.PoolStats + } + + // collector collects statistics from a redis client. + // It implements the prometheus.Collector interface. + collector struct { + hitDesc *prometheus.Desc + missDesc *prometheus.Desc + timeoutDesc *prometheus.Desc + totalDesc *prometheus.Desc + idleDesc *prometheus.Desc + staleDesc *prometheus.Desc + maxDesc *prometheus.Desc + + clients []*statGetter + lock sync.Mutex + } +) + +func newCollector() *collector { + c := &collector{ + hitDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "pool_hit_total"), + "Number of times a connection was found in the pool", + connLabels, nil, + ), + missDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "pool_miss_total"), + "Number of times a connection was not found in the pool", + connLabels, nil, + ), + timeoutDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "pool_timeout_total"), + "Number of times a timeout occurred when looking for a connection in the pool", + connLabels, nil, + ), + totalDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "pool_conn_total_current"), + "Current number of connections in the pool", + connLabels, nil, + ), + idleDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "pool_conn_idle_current"), + "Current number of idle connections in the pool", + connLabels, nil, + ), + staleDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "pool_conn_stale_total"), + "Number of times a connection was removed from the pool because it was stale", + connLabels, nil, + ), + maxDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "pool_conn_max"), + "Max number of connections in the pool", + connLabels, nil, + ), + } + + prometheus.MustRegister(c) + + return c +} + +// Describe implements the prometheus.Collector interface. +func (s *collector) Describe(descs chan<- *prometheus.Desc) { + descs <- s.hitDesc + descs <- s.missDesc + descs <- s.timeoutDesc + descs <- s.totalDesc + descs <- s.idleDesc + descs <- s.staleDesc + descs <- s.maxDesc +} + +// Collect implements the prometheus.Collector interface. +func (s *collector) Collect(metrics chan<- prometheus.Metric) { + s.lock.Lock() + defer s.lock.Unlock() + + for _, client := range s.clients { + key, clientType := client.key, client.clientType + stats := client.poolStats() + + metrics <- prometheus.MustNewConstMetric( + s.hitDesc, + prometheus.CounterValue, + float64(stats.Hits), + key, + clientType, + ) + metrics <- prometheus.MustNewConstMetric( + s.missDesc, + prometheus.CounterValue, + float64(stats.Misses), + key, + clientType, + ) + metrics <- prometheus.MustNewConstMetric( + s.timeoutDesc, + prometheus.CounterValue, + float64(stats.Timeouts), + key, + clientType, + ) + metrics <- prometheus.MustNewConstMetric( + s.totalDesc, + prometheus.GaugeValue, + float64(stats.TotalConns), + key, + clientType, + ) + metrics <- prometheus.MustNewConstMetric( + s.idleDesc, + prometheus.GaugeValue, + float64(stats.IdleConns), + key, + clientType, + ) + metrics <- prometheus.MustNewConstMetric( + s.staleDesc, + prometheus.CounterValue, + float64(stats.StaleConns), + key, + clientType, + ) + metrics <- prometheus.MustNewConstMetric( + s.maxDesc, + prometheus.CounterValue, + float64(client.poolSize), + key, + clientType, + ) + } +} + +func (s *collector) registerClient(client *statGetter) { + s.lock.Lock() + defer s.lock.Unlock() + + s.clients = append(s.clients, client) +} diff --git a/core/stores/redis/metrics_test.go b/core/stores/redis/metrics_test.go new file mode 100644 index 00000000..29c5f771 --- /dev/null +++ b/core/stores/redis/metrics_test.go @@ -0,0 +1,130 @@ +package redis + +import ( + "io" + "net/http" + "strings" + "testing" + "time" + + red "github.com/go-redis/redis/v8" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/zeromicro/go-zero/core/conf" + "github.com/zeromicro/go-zero/internal/devserver" +) + +func TestRedisMetric(t *testing.T) { + cfg := devserver.Config{} + _ = conf.FillDefault(&cfg) + server := devserver.NewServer(cfg) + server.StartAsync() + time.Sleep(time.Second) + + metricReqDur.Observe(8, "test-cmd") + metricReqErr.Inc("test-cmd", "internal-error") + metricSlowCount.Inc("test-cmd") + + url := "http://127.0.0.1:6060/metrics" + resp, err := http.Get(url) + assert.Nil(t, err) + defer resp.Body.Close() + s, err := io.ReadAll(resp.Body) + assert.Nil(t, err) + content := string(s) + assert.Contains(t, content, "redis_client_requests_duration_ms_sum{command=\"test-cmd\"} 8\n") + assert.Contains(t, content, "redis_client_requests_duration_ms_count{command=\"test-cmd\"} 1\n") + assert.Contains(t, content, "redis_client_requests_error_total{command=\"test-cmd\",error=\"internal-error\"} 1\n") + assert.Contains(t, content, "redis_client_requests_slow_total{command=\"test-cmd\"} 1\n") +} + +func Test_newCollector(t *testing.T) { + prometheus.Unregister(connCollector) + c := newCollector() + c.registerClient(&statGetter{ + clientType: "node", + key: "test1", + poolSize: 10, + poolStats: func() *red.PoolStats { + return &red.PoolStats{ + Hits: 10000, + Misses: 10, + Timeouts: 5, + TotalConns: 100, + IdleConns: 20, + StaleConns: 1, + } + }, + }) + c.registerClient(&statGetter{ + clientType: "node", + key: "test2", + poolSize: 11, + poolStats: func() *red.PoolStats { + return &red.PoolStats{ + Hits: 10001, + Misses: 11, + Timeouts: 6, + TotalConns: 101, + IdleConns: 21, + StaleConns: 2, + } + }, + }) + c.registerClient(&statGetter{ + clientType: "cluster", + key: "test3", + poolSize: 5, + poolStats: func() *red.PoolStats { + return &red.PoolStats{ + Hits: 20000, + Misses: 20, + Timeouts: 10, + TotalConns: 200, + IdleConns: 40, + StaleConns: 2, + } + }, + }) + val := ` + # HELP redis_client_pool_conn_idle_current Current number of idle connections in the pool + # TYPE redis_client_pool_conn_idle_current gauge + redis_client_pool_conn_idle_current{client_type="cluster",key="test3"} 40 + redis_client_pool_conn_idle_current{client_type="node",key="test1"} 20 + redis_client_pool_conn_idle_current{client_type="node",key="test2"} 21 + # HELP redis_client_pool_conn_max Max number of connections in the pool + # TYPE redis_client_pool_conn_max counter + redis_client_pool_conn_max{client_type="cluster",key="test3"} 5 + redis_client_pool_conn_max{client_type="node",key="test1"} 10 + redis_client_pool_conn_max{client_type="node",key="test2"} 11 + # HELP redis_client_pool_conn_stale_total Number of times a connection was removed from the pool because it was stale + # TYPE redis_client_pool_conn_stale_total counter + redis_client_pool_conn_stale_total{client_type="cluster",key="test3"} 2 + redis_client_pool_conn_stale_total{client_type="node",key="test1"} 1 + redis_client_pool_conn_stale_total{client_type="node",key="test2"} 2 + # HELP redis_client_pool_conn_total_current Current number of connections in the pool + # TYPE redis_client_pool_conn_total_current gauge + redis_client_pool_conn_total_current{client_type="cluster",key="test3"} 200 + redis_client_pool_conn_total_current{client_type="node",key="test1"} 100 + redis_client_pool_conn_total_current{client_type="node",key="test2"} 101 + # HELP redis_client_pool_hit_total Number of times a connection was found in the pool + # TYPE redis_client_pool_hit_total counter + redis_client_pool_hit_total{client_type="cluster",key="test3"} 20000 + redis_client_pool_hit_total{client_type="node",key="test1"} 10000 + redis_client_pool_hit_total{client_type="node",key="test2"} 10001 + # HELP redis_client_pool_miss_total Number of times a connection was not found in the pool + # TYPE redis_client_pool_miss_total counter + redis_client_pool_miss_total{client_type="cluster",key="test3"} 20 + redis_client_pool_miss_total{client_type="node",key="test1"} 10 + redis_client_pool_miss_total{client_type="node",key="test2"} 11 + # HELP redis_client_pool_timeout_total Number of times a timeout occurred when looking for a connection in the pool + # TYPE redis_client_pool_timeout_total counter + redis_client_pool_timeout_total{client_type="cluster",key="test3"} 10 + redis_client_pool_timeout_total{client_type="node",key="test1"} 5 + redis_client_pool_timeout_total{client_type="node",key="test2"} 6 +` + + err := testutil.CollectAndCompare(c, strings.NewReader(val)) + assert.NoError(t, err) +} diff --git a/core/stores/redis/redisclientmanager.go b/core/stores/redis/redisclientmanager.go index 8311e8c0..8b1b8ed5 100644 --- a/core/stores/redis/redisclientmanager.go +++ b/core/stores/redis/redisclientmanager.go @@ -3,6 +3,7 @@ package redis import ( "crypto/tls" "io" + "runtime" red "github.com/go-redis/redis/v8" "github.com/zeromicro/go-zero/core/syncx" @@ -14,7 +15,11 @@ const ( idleConns = 8 ) -var clientManager = syncx.NewResourceManager() +var ( + clientManager = syncx.NewResourceManager() + // nodePoolSize is default pool size for node type of redis. + nodePoolSize = 10 * runtime.GOMAXPROCS(0) +) func getClient(r *Redis) (*red.Client, error) { val, err := clientManager.GetResource(r.Addr, func() (io.Closer, error) { @@ -37,6 +42,15 @@ func getClient(r *Redis) (*red.Client, error) { store.AddHook(hook) } + connCollector.registerClient(&statGetter{ + clientType: NodeType, + key: r.Addr, + poolSize: nodePoolSize, + poolStats: func() *red.PoolStats { + return store.PoolStats() + }, + }) + return store, nil }) if err != nil { diff --git a/core/stores/redis/redisclustermanager.go b/core/stores/redis/redisclustermanager.go index d4a489e6..105498e8 100644 --- a/core/stores/redis/redisclustermanager.go +++ b/core/stores/redis/redisclustermanager.go @@ -3,6 +3,7 @@ package redis import ( "crypto/tls" "io" + "runtime" "strings" red "github.com/go-redis/redis/v8" @@ -11,7 +12,11 @@ import ( const addrSep = "," -var clusterManager = syncx.NewResourceManager() +var ( + clusterManager = syncx.NewResourceManager() + // clusterPoolSize is default pool size for cluster type of redis. + clusterPoolSize = 5 * runtime.GOMAXPROCS(0) +) func getCluster(r *Redis) (*red.ClusterClient, error) { val, err := clusterManager.GetResource(r.Addr, func() (io.Closer, error) { @@ -33,6 +38,15 @@ func getCluster(r *Redis) (*red.ClusterClient, error) { store.AddHook(hook) } + connCollector.registerClient(&statGetter{ + clientType: ClusterType, + key: r.Addr, + poolSize: clusterPoolSize, + poolStats: func() *red.PoolStats { + return store.PoolStats() + }, + }) + return store, nil }) if err != nil { diff --git a/core/stores/sqlx/metrics.go b/core/stores/sqlx/metrics.go index 823da2b4..842439fe 100644 --- a/core/stores/sqlx/metrics.go +++ b/core/stores/sqlx/metrics.go @@ -1,8 +1,14 @@ package sqlx -import "github.com/zeromicro/go-zero/core/metric" +import ( + "database/sql" + "sync" -const namespace = "sql_client" + "github.com/prometheus/client_golang/prometheus" + "github.com/zeromicro/go-zero/core/metric" +) + +const namespace = "mysql_client" var ( metricReqDur = metric.NewHistogramVec(&metric.HistogramVecOpts{ @@ -11,7 +17,7 @@ var ( Name: "duration_ms", Help: "mysql client requests duration(ms).", Labels: []string{"command"}, - Buckets: []float64{5, 10, 25, 50, 100, 250, 500, 1000, 2500}, + Buckets: []float64{0.25, 0.5, 1, 1.5, 2, 3, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 15000}, }) metricReqErr = metric.NewCounterVec(&metric.CounterVecOpts{ Namespace: namespace, @@ -20,4 +26,138 @@ var ( Help: "mysql client requests error count.", Labels: []string{"command", "error"}, }) + metricSlowCount = metric.NewCounterVec(&metric.CounterVecOpts{ + Namespace: namespace, + Subsystem: "requests", + Name: "slow_total", + Help: "mysql client requests slow count.", + Labels: []string{"command"}, + }) + + connLabels = []string{"db_name", "hash"} + + connCollector = newCollector() + + _ prometheus.Collector = (*collector)(nil) ) + +type ( + statGetter struct { + dbName string + hash string + poolStats func() sql.DBStats + } + + // collector collects statistics from a redis client. + // It implements the prometheus.Collector interface. + collector struct { + maxOpenConnections *prometheus.Desc + + openConnections *prometheus.Desc + inUseConnections *prometheus.Desc + idleConnections *prometheus.Desc + + waitCount *prometheus.Desc + waitDuration *prometheus.Desc + maxIdleClosed *prometheus.Desc + maxIdleTimeClosed *prometheus.Desc + maxLifetimeClosed *prometheus.Desc + + clients []*statGetter + lock sync.Mutex + } +) + +func newCollector() *collector { + c := &collector{ + maxOpenConnections: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "max_open_connections"), + "Maximum number of open connections to the database.", + connLabels, nil, + ), + openConnections: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "open_connections"), + "The number of established connections both in use and idle.", + connLabels, nil, + ), + inUseConnections: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "in_use_connections"), + "The number of connections currently in use.", + connLabels, nil, + ), + idleConnections: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "idle_connections"), + "The number of idle connections.", + connLabels, nil, + ), + waitCount: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "wait_count_total"), + "The total number of connections waited for.", + connLabels, nil, + ), + waitDuration: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "wait_duration_seconds_total"), + "The total time blocked waiting for a new connection.", + connLabels, nil, + ), + maxIdleClosed: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "max_idle_closed_total"), + "The total number of connections closed due to SetMaxIdleConns.", + connLabels, nil, + ), + maxIdleTimeClosed: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "max_idle_time_closed_total"), + "The total number of connections closed due to SetConnMaxIdleTime.", + connLabels, nil, + ), + maxLifetimeClosed: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "max_lifetime_closed_total"), + "The total number of connections closed due to SetConnMaxLifetime.", + connLabels, nil, + ), + } + + prometheus.MustRegister(c) + + return c +} + +// Describe implements the prometheus.Collector interface. +func (c *collector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.maxOpenConnections + ch <- c.openConnections + ch <- c.inUseConnections + ch <- c.idleConnections + ch <- c.waitCount + ch <- c.waitDuration + ch <- c.maxIdleClosed + ch <- c.maxLifetimeClosed + ch <- c.maxIdleTimeClosed +} + +// Collect implements the prometheus.Collector interface. +func (c *collector) Collect(ch chan<- prometheus.Metric) { + c.lock.Lock() + defer c.lock.Unlock() + + for _, client := range c.clients { + dbName, hash := client.dbName, client.hash + stats := client.poolStats() + ch <- prometheus.MustNewConstMetric(c.maxOpenConnections, prometheus.GaugeValue, float64(stats.MaxOpenConnections), dbName, hash) + ch <- prometheus.MustNewConstMetric(c.openConnections, prometheus.GaugeValue, float64(stats.OpenConnections), dbName, hash) + ch <- prometheus.MustNewConstMetric(c.inUseConnections, prometheus.GaugeValue, float64(stats.InUse), dbName, hash) + ch <- prometheus.MustNewConstMetric(c.idleConnections, prometheus.GaugeValue, float64(stats.Idle), dbName, hash) + ch <- prometheus.MustNewConstMetric(c.waitCount, prometheus.CounterValue, float64(stats.WaitCount), dbName, hash) + ch <- prometheus.MustNewConstMetric(c.waitDuration, prometheus.CounterValue, stats.WaitDuration.Seconds(), dbName, hash) + ch <- prometheus.MustNewConstMetric(c.maxIdleClosed, prometheus.CounterValue, float64(stats.MaxIdleClosed), dbName, hash) + ch <- prometheus.MustNewConstMetric(c.maxLifetimeClosed, prometheus.CounterValue, float64(stats.MaxLifetimeClosed), dbName, hash) + ch <- prometheus.MustNewConstMetric(c.maxIdleTimeClosed, prometheus.CounterValue, float64(stats.MaxIdleTimeClosed), dbName, hash) + } +} + +func (c *collector) registerClient(client *statGetter) { + c.lock.Lock() + defer c.lock.Unlock() + + c.clients = append(c.clients, client) +} diff --git a/core/stores/sqlx/metrics_test.go b/core/stores/sqlx/metrics_test.go new file mode 100644 index 00000000..192d1f32 --- /dev/null +++ b/core/stores/sqlx/metrics_test.go @@ -0,0 +1,147 @@ +package sqlx + +import ( + "database/sql" + "io" + "net/http" + "strings" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/zeromicro/go-zero/core/conf" + "github.com/zeromicro/go-zero/internal/devserver" +) + +func TestSqlxMetric(t *testing.T) { + cfg := devserver.Config{} + _ = conf.FillDefault(&cfg) + cfg.Port = 6480 + server := devserver.NewServer(cfg) + server.StartAsync() + time.Sleep(time.Second) + + metricReqDur.Observe(8, "test-cmd") + metricReqErr.Inc("test-cmd", "internal-error") + metricSlowCount.Inc("test-cmd") + + url := "http://127.0.0.1:6480/metrics" + resp, err := http.Get(url) + assert.Nil(t, err) + defer resp.Body.Close() + s, err := io.ReadAll(resp.Body) + assert.Nil(t, err) + content := string(s) + assert.Contains(t, content, "mysql_client_requests_duration_ms_sum{command=\"test-cmd\"} 8\n") + assert.Contains(t, content, "mysql_client_requests_duration_ms_count{command=\"test-cmd\"} 1\n") + assert.Contains(t, content, "mysql_client_requests_error_total{command=\"test-cmd\",error=\"internal-error\"} 1\n") + assert.Contains(t, content, "mysql_client_requests_slow_total{command=\"test-cmd\"} 1\n") +} + +func TestMetricCollector(t *testing.T) { + prometheus.Unregister(connCollector) + c := newCollector() + c.registerClient(&statGetter{ + dbName: "db-1", + hash: "hash-1", + poolStats: func() sql.DBStats { + return sql.DBStats{ + MaxOpenConnections: 1, + OpenConnections: 2, + InUse: 3, + Idle: 4, + WaitCount: 5, + WaitDuration: 6 * time.Second, + MaxIdleClosed: 7, + MaxIdleTimeClosed: 8, + MaxLifetimeClosed: 9, + } + }, + }) + c.registerClient(&statGetter{ + dbName: "db-1", + hash: "hash-2", + poolStats: func() sql.DBStats { + return sql.DBStats{ + MaxOpenConnections: 10, + OpenConnections: 20, + InUse: 30, + Idle: 40, + WaitCount: 50, + WaitDuration: 60 * time.Second, + MaxIdleClosed: 70, + MaxIdleTimeClosed: 80, + MaxLifetimeClosed: 90, + } + }, + }) + c.registerClient(&statGetter{ + dbName: "db-2", + hash: "hash-2", + poolStats: func() sql.DBStats { + return sql.DBStats{ + MaxOpenConnections: 100, + OpenConnections: 200, + InUse: 300, + Idle: 400, + WaitCount: 500, + WaitDuration: 600 * time.Second, + MaxIdleClosed: 700, + MaxIdleTimeClosed: 800, + MaxLifetimeClosed: 900, + } + }, + }) + val := ` + # HELP mysql_client_idle_connections The number of idle connections. + # TYPE mysql_client_idle_connections gauge + mysql_client_idle_connections{db_name="db-1",hash="hash-1"} 4 + mysql_client_idle_connections{db_name="db-1",hash="hash-2"} 40 + mysql_client_idle_connections{db_name="db-2",hash="hash-2"} 400 + # HELP mysql_client_in_use_connections The number of connections currently in use. + # TYPE mysql_client_in_use_connections gauge + mysql_client_in_use_connections{db_name="db-1",hash="hash-1"} 3 + mysql_client_in_use_connections{db_name="db-1",hash="hash-2"} 30 + mysql_client_in_use_connections{db_name="db-2",hash="hash-2"} 300 + # HELP mysql_client_max_idle_closed_total The total number of connections closed due to SetMaxIdleConns. + # TYPE mysql_client_max_idle_closed_total counter + mysql_client_max_idle_closed_total{db_name="db-1",hash="hash-1"} 7 + mysql_client_max_idle_closed_total{db_name="db-1",hash="hash-2"} 70 + mysql_client_max_idle_closed_total{db_name="db-2",hash="hash-2"} 700 + # HELP mysql_client_max_idle_time_closed_total The total number of connections closed due to SetConnMaxIdleTime. + # TYPE mysql_client_max_idle_time_closed_total counter + mysql_client_max_idle_time_closed_total{db_name="db-1",hash="hash-1"} 8 + mysql_client_max_idle_time_closed_total{db_name="db-1",hash="hash-2"} 80 + mysql_client_max_idle_time_closed_total{db_name="db-2",hash="hash-2"} 800 + # HELP mysql_client_max_lifetime_closed_total The total number of connections closed due to SetConnMaxLifetime. + # TYPE mysql_client_max_lifetime_closed_total counter + mysql_client_max_lifetime_closed_total{db_name="db-1",hash="hash-1"} 9 + mysql_client_max_lifetime_closed_total{db_name="db-1",hash="hash-2"} 90 + mysql_client_max_lifetime_closed_total{db_name="db-2",hash="hash-2"} 900 + # HELP mysql_client_max_open_connections Maximum number of open connections to the database. + # TYPE mysql_client_max_open_connections gauge + mysql_client_max_open_connections{db_name="db-1",hash="hash-1"} 1 + mysql_client_max_open_connections{db_name="db-1",hash="hash-2"} 10 + mysql_client_max_open_connections{db_name="db-2",hash="hash-2"} 100 + # HELP mysql_client_open_connections The number of established connections both in use and idle. + # TYPE mysql_client_open_connections gauge + mysql_client_open_connections{db_name="db-1",hash="hash-1"} 2 + mysql_client_open_connections{db_name="db-1",hash="hash-2"} 20 + mysql_client_open_connections{db_name="db-2",hash="hash-2"} 200 + # HELP mysql_client_wait_count_total The total number of connections waited for. + # TYPE mysql_client_wait_count_total counter + mysql_client_wait_count_total{db_name="db-1",hash="hash-1"} 5 + mysql_client_wait_count_total{db_name="db-1",hash="hash-2"} 50 + mysql_client_wait_count_total{db_name="db-2",hash="hash-2"} 500 + # HELP mysql_client_wait_duration_seconds_total The total time blocked waiting for a new connection. + # TYPE mysql_client_wait_duration_seconds_total counter + mysql_client_wait_duration_seconds_total{db_name="db-1",hash="hash-1"} 6 + mysql_client_wait_duration_seconds_total{db_name="db-1",hash="hash-2"} 60 + mysql_client_wait_duration_seconds_total{db_name="db-2",hash="hash-2"} 600 +` + + err := testutil.CollectAndCompare(c, strings.NewReader(val)) + assert.NoError(t, err) +} diff --git a/core/stores/sqlx/stmt.go b/core/stores/sqlx/stmt.go index cdaece6f..ab688abb 100644 --- a/core/stores/sqlx/stmt.go +++ b/core/stores/sqlx/stmt.go @@ -128,6 +128,7 @@ func (e *realSqlGuard) finish(ctx context.Context, err error) { duration := timex.Since(e.startTime) if duration > slowThreshold.Load() { logx.WithContext(ctx).WithDuration(duration).Slowf("[SQL] %s: slowcall - %s", e.command, e.stmt) + metricSlowCount.Inc(e.command) } else if logSql.True() { logx.WithContext(ctx).WithDuration(duration).Infof("sql %s: %s", e.command, e.stmt) } @@ -136,7 +137,7 @@ func (e *realSqlGuard) finish(ctx context.Context, err error) { logSqlError(ctx, e.stmt, err) } - metricReqDur.Observe(duration.Milliseconds(), e.command) + metricReqDur.ObserveFloat(float64(duration)/float64(time.Millisecond), e.command) } func (e *realSqlGuard) start(q string, args ...any) error { diff --git a/rest/httpc/internal/metricsinterceptor.go b/rest/httpc/internal/metricsinterceptor.go new file mode 100644 index 00000000..38cbb84c --- /dev/null +++ b/rest/httpc/internal/metricsinterceptor.go @@ -0,0 +1,71 @@ +package internal + +import ( + "net/http" + "net/url" + "strconv" + "time" + + "github.com/zeromicro/go-zero/core/metric" + "github.com/zeromicro/go-zero/core/timex" +) + +const clientNamespace = "httpc_client" + +var ( + MetricClientReqDur = metric.NewHistogramVec(&metric.HistogramVecOpts{ + Namespace: clientNamespace, + Subsystem: "requests", + Name: "duration_ms", + Help: "http client requests duration(ms).", + Labels: []string{"name", "method", "url"}, + Buckets: []float64{0.25, 0.5, 1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 15000}, + }) + + MetricClientReqCodeTotal = metric.NewCounterVec(&metric.CounterVecOpts{ + Namespace: clientNamespace, + Subsystem: "requests", + Name: "code_total", + Help: "http client requests code count.", + Labels: []string{"name", "method", "url", "code"}, + }) +) + +type ( + MetricsURLRewriter func(u url.URL) string +) + +func MetricsInterceptor(name string, pr MetricsURLRewriter) Interceptor { + return func(r *http.Request) (*http.Request, ResponseHandler) { + startTime := timex.Now() + return r, func(resp *http.Response, err error) { + u := cleanURL(*r.URL) + method := r.Method + var ( + code int + path string + ) + // error or resp is nil, set code=500 + if err != nil || resp == nil { + code = http.StatusInternalServerError + } else { + code = resp.StatusCode + } + if pr != nil { + path = pr(u) + } else { + path = u.String() + } + + MetricClientReqDur.ObserveFloat(float64(timex.Since(startTime))/float64(time.Millisecond), name, method, path) + MetricClientReqCodeTotal.Inc(name, method, path, strconv.Itoa(code)) + } + } +} + +func cleanURL(r url.URL) url.URL { + r.RawQuery = "" + r.RawFragment = "" + r.User = nil + return r +} diff --git a/rest/httpc/internal/metricsinterceptor_test.go b/rest/httpc/internal/metricsinterceptor_test.go new file mode 100644 index 00000000..413619f9 --- /dev/null +++ b/rest/httpc/internal/metricsinterceptor_test.go @@ -0,0 +1,35 @@ +package internal + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" + "github.com/zeromicro/go-zero/core/logx" +) + +func TestMetricsInterceptor(t *testing.T) { + c := gomock.NewController(t) + defer c.Finish() + + logx.Disable() + + svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(100 * time.Millisecond) + w.WriteHeader(http.StatusInternalServerError) + })) + defer svr.Close() + + req, err := http.NewRequest(http.MethodGet, svr.URL, nil) + assert.NotNil(t, req) + assert.Nil(t, err) + interceptor := MetricsInterceptor("test", nil) + req, handler := interceptor(req) + resp, err := http.DefaultClient.Do(req) + assert.NotNil(t, resp) + assert.Nil(t, err) + handler(resp, err) +} diff --git a/zrpc/internal/clientinterceptors/prometheusinterceptor.go b/zrpc/internal/clientinterceptors/prometheusinterceptor.go index 23ab3a75..fe677dba 100644 --- a/zrpc/internal/clientinterceptors/prometheusinterceptor.go +++ b/zrpc/internal/clientinterceptors/prometheusinterceptor.go @@ -19,7 +19,7 @@ var ( Name: "duration_ms", Help: "rpc client requests duration(ms).", Labels: []string{"method"}, - Buckets: []float64{5, 10, 25, 50, 100, 250, 500, 1000}, + Buckets: []float64{1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000}, }) metricClientReqCodeTotal = metric.NewCounterVec(&metric.CounterVecOpts{ diff --git a/zrpc/internal/serverinterceptors/prometheusinterceptor.go b/zrpc/internal/serverinterceptors/prometheusinterceptor.go index 9fcaaef9..aa22a389 100644 --- a/zrpc/internal/serverinterceptors/prometheusinterceptor.go +++ b/zrpc/internal/serverinterceptors/prometheusinterceptor.go @@ -19,7 +19,7 @@ var ( Name: "duration_ms", Help: "rpc server requests duration(ms).", Labels: []string{"method"}, - Buckets: []float64{5, 10, 25, 50, 100, 250, 500, 1000}, + Buckets: []float64{1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000}, }) metricServerReqCodeTotal = metric.NewCounterVec(&metric.CounterVecOpts{