您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

go_collector.go 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. // Copyright 2018 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package prometheus
  14. import (
  15. "runtime"
  16. "runtime/debug"
  17. "sync"
  18. "time"
  19. )
  20. type goCollector struct {
  21. goroutinesDesc *Desc
  22. threadsDesc *Desc
  23. gcDesc *Desc
  24. goInfoDesc *Desc
  25. // ms... are memstats related.
  26. msLast *runtime.MemStats // Previously collected memstats.
  27. msLastTimestamp time.Time
  28. msMtx sync.Mutex // Protects msLast and msLastTimestamp.
  29. msMetrics memStatsMetrics
  30. msRead func(*runtime.MemStats) // For mocking in tests.
  31. msMaxWait time.Duration // Wait time for fresh memstats.
  32. msMaxAge time.Duration // Maximum allowed age of old memstats.
  33. }
  34. // NewGoCollector returns a collector which exports metrics about the current Go
  35. // process. This includes memory stats. To collect those, runtime.ReadMemStats
  36. // is called. This requires to “stop the world”, which usually only happens for
  37. // garbage collection (GC). Take the following implications into account when
  38. // deciding whether to use the Go collector:
  39. //
  40. // 1. The performance impact of stopping the world is the more relevant the more
  41. // frequently metrics are collected. However, with Go1.9 or later the
  42. // stop-the-world time per metrics collection is very short (~25µs) so that the
  43. // performance impact will only matter in rare cases. However, with older Go
  44. // versions, the stop-the-world duration depends on the heap size and can be
  45. // quite significant (~1.7 ms/GiB as per
  46. // https://go-review.googlesource.com/c/go/+/34937).
  47. //
  48. // 2. During an ongoing GC, nothing else can stop the world. Therefore, if the
  49. // metrics collection happens to coincide with GC, it will only complete after
  50. // GC has finished. Usually, GC is fast enough to not cause problems. However,
  51. // with a very large heap, GC might take multiple seconds, which is enough to
  52. // cause scrape timeouts in common setups. To avoid this problem, the Go
  53. // collector will use the memstats from a previous collection if
  54. // runtime.ReadMemStats takes more than 1s. However, if there are no previously
  55. // collected memstats, or their collection is more than 5m ago, the collection
  56. // will block until runtime.ReadMemStats succeeds. (The problem might be solved
  57. // in Go1.13, see https://github.com/golang/go/issues/19812 for the related Go
  58. // issue.)
  59. func NewGoCollector() Collector {
  60. return &goCollector{
  61. goroutinesDesc: NewDesc(
  62. "go_goroutines",
  63. "Number of goroutines that currently exist.",
  64. nil, nil),
  65. threadsDesc: NewDesc(
  66. "go_threads",
  67. "Number of OS threads created.",
  68. nil, nil),
  69. gcDesc: NewDesc(
  70. "go_gc_duration_seconds",
  71. "A summary of the GC invocation durations.",
  72. nil, nil),
  73. goInfoDesc: NewDesc(
  74. "go_info",
  75. "Information about the Go environment.",
  76. nil, Labels{"version": runtime.Version()}),
  77. msLast: &runtime.MemStats{},
  78. msRead: runtime.ReadMemStats,
  79. msMaxWait: time.Second,
  80. msMaxAge: 5 * time.Minute,
  81. msMetrics: memStatsMetrics{
  82. {
  83. desc: NewDesc(
  84. memstatNamespace("alloc_bytes"),
  85. "Number of bytes allocated and still in use.",
  86. nil, nil,
  87. ),
  88. eval: func(ms *runtime.MemStats) float64 { return float64(ms.Alloc) },
  89. valType: GaugeValue,
  90. }, {
  91. desc: NewDesc(
  92. memstatNamespace("alloc_bytes_total"),
  93. "Total number of bytes allocated, even if freed.",
  94. nil, nil,
  95. ),
  96. eval: func(ms *runtime.MemStats) float64 { return float64(ms.TotalAlloc) },
  97. valType: CounterValue,
  98. }, {
  99. desc: NewDesc(
  100. memstatNamespace("sys_bytes"),
  101. "Number of bytes obtained from system.",
  102. nil, nil,
  103. ),
  104. eval: func(ms *runtime.MemStats) float64 { return float64(ms.Sys) },
  105. valType: GaugeValue,
  106. }, {
  107. desc: NewDesc(
  108. memstatNamespace("lookups_total"),
  109. "Total number of pointer lookups.",
  110. nil, nil,
  111. ),
  112. eval: func(ms *runtime.MemStats) float64 { return float64(ms.Lookups) },
  113. valType: CounterValue,
  114. }, {
  115. desc: NewDesc(
  116. memstatNamespace("mallocs_total"),
  117. "Total number of mallocs.",
  118. nil, nil,
  119. ),
  120. eval: func(ms *runtime.MemStats) float64 { return float64(ms.Mallocs) },
  121. valType: CounterValue,
  122. }, {
  123. desc: NewDesc(
  124. memstatNamespace("frees_total"),
  125. "Total number of frees.",
  126. nil, nil,
  127. ),
  128. eval: func(ms *runtime.MemStats) float64 { return float64(ms.Frees) },
  129. valType: CounterValue,
  130. }, {
  131. desc: NewDesc(
  132. memstatNamespace("heap_alloc_bytes"),
  133. "Number of heap bytes allocated and still in use.",
  134. nil, nil,
  135. ),
  136. eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapAlloc) },
  137. valType: GaugeValue,
  138. }, {
  139. desc: NewDesc(
  140. memstatNamespace("heap_sys_bytes"),
  141. "Number of heap bytes obtained from system.",
  142. nil, nil,
  143. ),
  144. eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapSys) },
  145. valType: GaugeValue,
  146. }, {
  147. desc: NewDesc(
  148. memstatNamespace("heap_idle_bytes"),
  149. "Number of heap bytes waiting to be used.",
  150. nil, nil,
  151. ),
  152. eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapIdle) },
  153. valType: GaugeValue,
  154. }, {
  155. desc: NewDesc(
  156. memstatNamespace("heap_inuse_bytes"),
  157. "Number of heap bytes that are in use.",
  158. nil, nil,
  159. ),
  160. eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapInuse) },
  161. valType: GaugeValue,
  162. }, {
  163. desc: NewDesc(
  164. memstatNamespace("heap_released_bytes"),
  165. "Number of heap bytes released to OS.",
  166. nil, nil,
  167. ),
  168. eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapReleased) },
  169. valType: GaugeValue,
  170. }, {
  171. desc: NewDesc(
  172. memstatNamespace("heap_objects"),
  173. "Number of allocated objects.",
  174. nil, nil,
  175. ),
  176. eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapObjects) },
  177. valType: GaugeValue,
  178. }, {
  179. desc: NewDesc(
  180. memstatNamespace("stack_inuse_bytes"),
  181. "Number of bytes in use by the stack allocator.",
  182. nil, nil,
  183. ),
  184. eval: func(ms *runtime.MemStats) float64 { return float64(ms.StackInuse) },
  185. valType: GaugeValue,
  186. }, {
  187. desc: NewDesc(
  188. memstatNamespace("stack_sys_bytes"),
  189. "Number of bytes obtained from system for stack allocator.",
  190. nil, nil,
  191. ),
  192. eval: func(ms *runtime.MemStats) float64 { return float64(ms.StackSys) },
  193. valType: GaugeValue,
  194. }, {
  195. desc: NewDesc(
  196. memstatNamespace("mspan_inuse_bytes"),
  197. "Number of bytes in use by mspan structures.",
  198. nil, nil,
  199. ),
  200. eval: func(ms *runtime.MemStats) float64 { return float64(ms.MSpanInuse) },
  201. valType: GaugeValue,
  202. }, {
  203. desc: NewDesc(
  204. memstatNamespace("mspan_sys_bytes"),
  205. "Number of bytes used for mspan structures obtained from system.",
  206. nil, nil,
  207. ),
  208. eval: func(ms *runtime.MemStats) float64 { return float64(ms.MSpanSys) },
  209. valType: GaugeValue,
  210. }, {
  211. desc: NewDesc(
  212. memstatNamespace("mcache_inuse_bytes"),
  213. "Number of bytes in use by mcache structures.",
  214. nil, nil,
  215. ),
  216. eval: func(ms *runtime.MemStats) float64 { return float64(ms.MCacheInuse) },
  217. valType: GaugeValue,
  218. }, {
  219. desc: NewDesc(
  220. memstatNamespace("mcache_sys_bytes"),
  221. "Number of bytes used for mcache structures obtained from system.",
  222. nil, nil,
  223. ),
  224. eval: func(ms *runtime.MemStats) float64 { return float64(ms.MCacheSys) },
  225. valType: GaugeValue,
  226. }, {
  227. desc: NewDesc(
  228. memstatNamespace("buck_hash_sys_bytes"),
  229. "Number of bytes used by the profiling bucket hash table.",
  230. nil, nil,
  231. ),
  232. eval: func(ms *runtime.MemStats) float64 { return float64(ms.BuckHashSys) },
  233. valType: GaugeValue,
  234. }, {
  235. desc: NewDesc(
  236. memstatNamespace("gc_sys_bytes"),
  237. "Number of bytes used for garbage collection system metadata.",
  238. nil, nil,
  239. ),
  240. eval: func(ms *runtime.MemStats) float64 { return float64(ms.GCSys) },
  241. valType: GaugeValue,
  242. }, {
  243. desc: NewDesc(
  244. memstatNamespace("other_sys_bytes"),
  245. "Number of bytes used for other system allocations.",
  246. nil, nil,
  247. ),
  248. eval: func(ms *runtime.MemStats) float64 { return float64(ms.OtherSys) },
  249. valType: GaugeValue,
  250. }, {
  251. desc: NewDesc(
  252. memstatNamespace("next_gc_bytes"),
  253. "Number of heap bytes when next garbage collection will take place.",
  254. nil, nil,
  255. ),
  256. eval: func(ms *runtime.MemStats) float64 { return float64(ms.NextGC) },
  257. valType: GaugeValue,
  258. }, {
  259. desc: NewDesc(
  260. memstatNamespace("last_gc_time_seconds"),
  261. "Number of seconds since 1970 of last garbage collection.",
  262. nil, nil,
  263. ),
  264. eval: func(ms *runtime.MemStats) float64 { return float64(ms.LastGC) / 1e9 },
  265. valType: GaugeValue,
  266. }, {
  267. desc: NewDesc(
  268. memstatNamespace("gc_cpu_fraction"),
  269. "The fraction of this program's available CPU time used by the GC since the program started.",
  270. nil, nil,
  271. ),
  272. eval: func(ms *runtime.MemStats) float64 { return ms.GCCPUFraction },
  273. valType: GaugeValue,
  274. },
  275. },
  276. }
  277. }
  278. func memstatNamespace(s string) string {
  279. return "go_memstats_" + s
  280. }
  281. // Describe returns all descriptions of the collector.
  282. func (c *goCollector) Describe(ch chan<- *Desc) {
  283. ch <- c.goroutinesDesc
  284. ch <- c.threadsDesc
  285. ch <- c.gcDesc
  286. ch <- c.goInfoDesc
  287. for _, i := range c.msMetrics {
  288. ch <- i.desc
  289. }
  290. }
  291. // Collect returns the current state of all metrics of the collector.
  292. func (c *goCollector) Collect(ch chan<- Metric) {
  293. var (
  294. ms = &runtime.MemStats{}
  295. done = make(chan struct{})
  296. )
  297. // Start reading memstats first as it might take a while.
  298. go func() {
  299. c.msRead(ms)
  300. c.msMtx.Lock()
  301. c.msLast = ms
  302. c.msLastTimestamp = time.Now()
  303. c.msMtx.Unlock()
  304. close(done)
  305. }()
  306. ch <- MustNewConstMetric(c.goroutinesDesc, GaugeValue, float64(runtime.NumGoroutine()))
  307. n, _ := runtime.ThreadCreateProfile(nil)
  308. ch <- MustNewConstMetric(c.threadsDesc, GaugeValue, float64(n))
  309. var stats debug.GCStats
  310. stats.PauseQuantiles = make([]time.Duration, 5)
  311. debug.ReadGCStats(&stats)
  312. quantiles := make(map[float64]float64)
  313. for idx, pq := range stats.PauseQuantiles[1:] {
  314. quantiles[float64(idx+1)/float64(len(stats.PauseQuantiles)-1)] = pq.Seconds()
  315. }
  316. quantiles[0.0] = stats.PauseQuantiles[0].Seconds()
  317. ch <- MustNewConstSummary(c.gcDesc, uint64(stats.NumGC), stats.PauseTotal.Seconds(), quantiles)
  318. ch <- MustNewConstMetric(c.goInfoDesc, GaugeValue, 1)
  319. timer := time.NewTimer(c.msMaxWait)
  320. select {
  321. case <-done: // Our own ReadMemStats succeeded in time. Use it.
  322. timer.Stop() // Important for high collection frequencies to not pile up timers.
  323. c.msCollect(ch, ms)
  324. return
  325. case <-timer.C: // Time out, use last memstats if possible. Continue below.
  326. }
  327. c.msMtx.Lock()
  328. if time.Since(c.msLastTimestamp) < c.msMaxAge {
  329. // Last memstats are recent enough. Collect from them under the lock.
  330. c.msCollect(ch, c.msLast)
  331. c.msMtx.Unlock()
  332. return
  333. }
  334. // If we are here, the last memstats are too old or don't exist. We have
  335. // to wait until our own ReadMemStats finally completes. For that to
  336. // happen, we have to release the lock.
  337. c.msMtx.Unlock()
  338. <-done
  339. c.msCollect(ch, ms)
  340. }
  341. func (c *goCollector) msCollect(ch chan<- Metric, ms *runtime.MemStats) {
  342. for _, i := range c.msMetrics {
  343. ch <- MustNewConstMetric(i.desc, i.valType, i.eval(ms))
  344. }
  345. }
  346. // memStatsMetrics provide description, value, and value type for memstat metrics.
  347. type memStatsMetrics []struct {
  348. desc *Desc
  349. eval func(*runtime.MemStats) float64
  350. valType ValueType
  351. }