You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937
  1. // Copyright 2014 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package prometheus
  14. import (
  15. "bytes"
  16. "fmt"
  17. "io/ioutil"
  18. "os"
  19. "path/filepath"
  20. "runtime"
  21. "sort"
  22. "strings"
  23. "sync"
  24. "unicode/utf8"
  25. "github.com/golang/protobuf/proto"
  26. "github.com/prometheus/common/expfmt"
  27. dto "github.com/prometheus/client_model/go"
  28. "github.com/prometheus/client_golang/prometheus/internal"
  29. )
  30. const (
  31. // Capacity for the channel to collect metrics and descriptors.
  32. capMetricChan = 1000
  33. capDescChan = 10
  34. )
  35. // DefaultRegisterer and DefaultGatherer are the implementations of the
  36. // Registerer and Gatherer interface a number of convenience functions in this
  37. // package act on. Initially, both variables point to the same Registry, which
  38. // has a process collector (currently on Linux only, see NewProcessCollector)
  39. // and a Go collector (see NewGoCollector, in particular the note about
  40. // stop-the-world implication with Go versions older than 1.9) already
  41. // registered. This approach to keep default instances as global state mirrors
  42. // the approach of other packages in the Go standard library. Note that there
  43. // are caveats. Change the variables with caution and only if you understand the
  44. // consequences. Users who want to avoid global state altogether should not use
  45. // the convenience functions and act on custom instances instead.
  46. var (
  47. defaultRegistry = NewRegistry()
  48. DefaultRegisterer Registerer = defaultRegistry
  49. DefaultGatherer Gatherer = defaultRegistry
  50. )
  51. func init() {
  52. MustRegister(NewProcessCollector(ProcessCollectorOpts{}))
  53. MustRegister(NewGoCollector())
  54. }
  55. // NewRegistry creates a new vanilla Registry without any Collectors
  56. // pre-registered.
  57. func NewRegistry() *Registry {
  58. return &Registry{
  59. collectorsByID: map[uint64]Collector{},
  60. descIDs: map[uint64]struct{}{},
  61. dimHashesByName: map[string]uint64{},
  62. }
  63. }
  64. // NewPedanticRegistry returns a registry that checks during collection if each
  65. // collected Metric is consistent with its reported Desc, and if the Desc has
  66. // actually been registered with the registry. Unchecked Collectors (those whose
  67. // Describe methed does not yield any descriptors) are excluded from the check.
  68. //
  69. // Usually, a Registry will be happy as long as the union of all collected
  70. // Metrics is consistent and valid even if some metrics are not consistent with
  71. // their own Desc or a Desc provided by their registered Collector. Well-behaved
  72. // Collectors and Metrics will only provide consistent Descs. This Registry is
  73. // useful to test the implementation of Collectors and Metrics.
  74. func NewPedanticRegistry() *Registry {
  75. r := NewRegistry()
  76. r.pedanticChecksEnabled = true
  77. return r
  78. }
  79. // Registerer is the interface for the part of a registry in charge of
  80. // registering and unregistering. Users of custom registries should use
  81. // Registerer as type for registration purposes (rather than the Registry type
  82. // directly). In that way, they are free to use custom Registerer implementation
  83. // (e.g. for testing purposes).
  84. type Registerer interface {
  85. // Register registers a new Collector to be included in metrics
  86. // collection. It returns an error if the descriptors provided by the
  87. // Collector are invalid or if they — in combination with descriptors of
  88. // already registered Collectors — do not fulfill the consistency and
  89. // uniqueness criteria described in the documentation of metric.Desc.
  90. //
  91. // If the provided Collector is equal to a Collector already registered
  92. // (which includes the case of re-registering the same Collector), the
  93. // returned error is an instance of AlreadyRegisteredError, which
  94. // contains the previously registered Collector.
  95. //
  96. // A Collector whose Describe method does not yield any Desc is treated
  97. // as unchecked. Registration will always succeed. No check for
  98. // re-registering (see previous paragraph) is performed. Thus, the
  99. // caller is responsible for not double-registering the same unchecked
  100. // Collector, and for providing a Collector that will not cause
  101. // inconsistent metrics on collection. (This would lead to scrape
  102. // errors.)
  103. Register(Collector) error
  104. // MustRegister works like Register but registers any number of
  105. // Collectors and panics upon the first registration that causes an
  106. // error.
  107. MustRegister(...Collector)
  108. // Unregister unregisters the Collector that equals the Collector passed
  109. // in as an argument. (Two Collectors are considered equal if their
  110. // Describe method yields the same set of descriptors.) The function
  111. // returns whether a Collector was unregistered. Note that an unchecked
  112. // Collector cannot be unregistered (as its Describe method does not
  113. // yield any descriptor).
  114. //
  115. // Note that even after unregistering, it will not be possible to
  116. // register a new Collector that is inconsistent with the unregistered
  117. // Collector, e.g. a Collector collecting metrics with the same name but
  118. // a different help string. The rationale here is that the same registry
  119. // instance must only collect consistent metrics throughout its
  120. // lifetime.
  121. Unregister(Collector) bool
  122. }
  123. // Gatherer is the interface for the part of a registry in charge of gathering
  124. // the collected metrics into a number of MetricFamilies. The Gatherer interface
  125. // comes with the same general implication as described for the Registerer
  126. // interface.
  127. type Gatherer interface {
  128. // Gather calls the Collect method of the registered Collectors and then
  129. // gathers the collected metrics into a lexicographically sorted slice
  130. // of uniquely named MetricFamily protobufs. Gather ensures that the
  131. // returned slice is valid and self-consistent so that it can be used
  132. // for valid exposition. As an exception to the strict consistency
  133. // requirements described for metric.Desc, Gather will tolerate
  134. // different sets of label names for metrics of the same metric family.
  135. //
  136. // Even if an error occurs, Gather attempts to gather as many metrics as
  137. // possible. Hence, if a non-nil error is returned, the returned
  138. // MetricFamily slice could be nil (in case of a fatal error that
  139. // prevented any meaningful metric collection) or contain a number of
  140. // MetricFamily protobufs, some of which might be incomplete, and some
  141. // might be missing altogether. The returned error (which might be a
  142. // MultiError) explains the details. Note that this is mostly useful for
  143. // debugging purposes. If the gathered protobufs are to be used for
  144. // exposition in actual monitoring, it is almost always better to not
  145. // expose an incomplete result and instead disregard the returned
  146. // MetricFamily protobufs in case the returned error is non-nil.
  147. Gather() ([]*dto.MetricFamily, error)
  148. }
  149. // Register registers the provided Collector with the DefaultRegisterer.
  150. //
  151. // Register is a shortcut for DefaultRegisterer.Register(c). See there for more
  152. // details.
  153. func Register(c Collector) error {
  154. return DefaultRegisterer.Register(c)
  155. }
  156. // MustRegister registers the provided Collectors with the DefaultRegisterer and
  157. // panics if any error occurs.
  158. //
  159. // MustRegister is a shortcut for DefaultRegisterer.MustRegister(cs...). See
  160. // there for more details.
  161. func MustRegister(cs ...Collector) {
  162. DefaultRegisterer.MustRegister(cs...)
  163. }
  164. // Unregister removes the registration of the provided Collector from the
  165. // DefaultRegisterer.
  166. //
  167. // Unregister is a shortcut for DefaultRegisterer.Unregister(c). See there for
  168. // more details.
  169. func Unregister(c Collector) bool {
  170. return DefaultRegisterer.Unregister(c)
  171. }
  172. // GathererFunc turns a function into a Gatherer.
  173. type GathererFunc func() ([]*dto.MetricFamily, error)
  174. // Gather implements Gatherer.
  175. func (gf GathererFunc) Gather() ([]*dto.MetricFamily, error) {
  176. return gf()
  177. }
  178. // AlreadyRegisteredError is returned by the Register method if the Collector to
  179. // be registered has already been registered before, or a different Collector
  180. // that collects the same metrics has been registered before. Registration fails
  181. // in that case, but you can detect from the kind of error what has
  182. // happened. The error contains fields for the existing Collector and the
  183. // (rejected) new Collector that equals the existing one. This can be used to
  184. // find out if an equal Collector has been registered before and switch over to
  185. // using the old one, as demonstrated in the example.
  186. type AlreadyRegisteredError struct {
  187. ExistingCollector, NewCollector Collector
  188. }
  189. func (err AlreadyRegisteredError) Error() string {
  190. return "duplicate metrics collector registration attempted"
  191. }
  192. // MultiError is a slice of errors implementing the error interface. It is used
  193. // by a Gatherer to report multiple errors during MetricFamily gathering.
  194. type MultiError []error
  195. func (errs MultiError) Error() string {
  196. if len(errs) == 0 {
  197. return ""
  198. }
  199. buf := &bytes.Buffer{}
  200. fmt.Fprintf(buf, "%d error(s) occurred:", len(errs))
  201. for _, err := range errs {
  202. fmt.Fprintf(buf, "\n* %s", err)
  203. }
  204. return buf.String()
  205. }
  206. // Append appends the provided error if it is not nil.
  207. func (errs *MultiError) Append(err error) {
  208. if err != nil {
  209. *errs = append(*errs, err)
  210. }
  211. }
  212. // MaybeUnwrap returns nil if len(errs) is 0. It returns the first and only
  213. // contained error as error if len(errs is 1). In all other cases, it returns
  214. // the MultiError directly. This is helpful for returning a MultiError in a way
  215. // that only uses the MultiError if needed.
  216. func (errs MultiError) MaybeUnwrap() error {
  217. switch len(errs) {
  218. case 0:
  219. return nil
  220. case 1:
  221. return errs[0]
  222. default:
  223. return errs
  224. }
  225. }
  226. // Registry registers Prometheus collectors, collects their metrics, and gathers
  227. // them into MetricFamilies for exposition. It implements both Registerer and
  228. // Gatherer. The zero value is not usable. Create instances with NewRegistry or
  229. // NewPedanticRegistry.
  230. type Registry struct {
  231. mtx sync.RWMutex
  232. collectorsByID map[uint64]Collector // ID is a hash of the descIDs.
  233. descIDs map[uint64]struct{}
  234. dimHashesByName map[string]uint64
  235. uncheckedCollectors []Collector
  236. pedanticChecksEnabled bool
  237. }
  238. // Register implements Registerer.
  239. func (r *Registry) Register(c Collector) error {
  240. var (
  241. descChan = make(chan *Desc, capDescChan)
  242. newDescIDs = map[uint64]struct{}{}
  243. newDimHashesByName = map[string]uint64{}
  244. collectorID uint64 // Just a sum of all desc IDs.
  245. duplicateDescErr error
  246. )
  247. go func() {
  248. c.Describe(descChan)
  249. close(descChan)
  250. }()
  251. r.mtx.Lock()
  252. defer func() {
  253. // Drain channel in case of premature return to not leak a goroutine.
  254. for range descChan {
  255. }
  256. r.mtx.Unlock()
  257. }()
  258. // Conduct various tests...
  259. for desc := range descChan {
  260. // Is the descriptor valid at all?
  261. if desc.err != nil {
  262. return fmt.Errorf("descriptor %s is invalid: %s", desc, desc.err)
  263. }
  264. // Is the descID unique?
  265. // (In other words: Is the fqName + constLabel combination unique?)
  266. if _, exists := r.descIDs[desc.id]; exists {
  267. duplicateDescErr = fmt.Errorf("descriptor %s already exists with the same fully-qualified name and const label values", desc)
  268. }
  269. // If it is not a duplicate desc in this collector, add it to
  270. // the collectorID. (We allow duplicate descs within the same
  271. // collector, but their existence must be a no-op.)
  272. if _, exists := newDescIDs[desc.id]; !exists {
  273. newDescIDs[desc.id] = struct{}{}
  274. collectorID += desc.id
  275. }
  276. // Are all the label names and the help string consistent with
  277. // previous descriptors of the same name?
  278. // First check existing descriptors...
  279. if dimHash, exists := r.dimHashesByName[desc.fqName]; exists {
  280. if dimHash != desc.dimHash {
  281. return fmt.Errorf("a previously registered descriptor with the same fully-qualified name as %s has different label names or a different help string", desc)
  282. }
  283. } else {
  284. // ...then check the new descriptors already seen.
  285. if dimHash, exists := newDimHashesByName[desc.fqName]; exists {
  286. if dimHash != desc.dimHash {
  287. return fmt.Errorf("descriptors reported by collector have inconsistent label names or help strings for the same fully-qualified name, offender is %s", desc)
  288. }
  289. } else {
  290. newDimHashesByName[desc.fqName] = desc.dimHash
  291. }
  292. }
  293. }
  294. // A Collector yielding no Desc at all is considered unchecked.
  295. if len(newDescIDs) == 0 {
  296. r.uncheckedCollectors = append(r.uncheckedCollectors, c)
  297. return nil
  298. }
  299. if existing, exists := r.collectorsByID[collectorID]; exists {
  300. return AlreadyRegisteredError{
  301. ExistingCollector: existing,
  302. NewCollector: c,
  303. }
  304. }
  305. // If the collectorID is new, but at least one of the descs existed
  306. // before, we are in trouble.
  307. if duplicateDescErr != nil {
  308. return duplicateDescErr
  309. }
  310. // Only after all tests have passed, actually register.
  311. r.collectorsByID[collectorID] = c
  312. for hash := range newDescIDs {
  313. r.descIDs[hash] = struct{}{}
  314. }
  315. for name, dimHash := range newDimHashesByName {
  316. r.dimHashesByName[name] = dimHash
  317. }
  318. return nil
  319. }
  320. // Unregister implements Registerer.
  321. func (r *Registry) Unregister(c Collector) bool {
  322. var (
  323. descChan = make(chan *Desc, capDescChan)
  324. descIDs = map[uint64]struct{}{}
  325. collectorID uint64 // Just a sum of the desc IDs.
  326. )
  327. go func() {
  328. c.Describe(descChan)
  329. close(descChan)
  330. }()
  331. for desc := range descChan {
  332. if _, exists := descIDs[desc.id]; !exists {
  333. collectorID += desc.id
  334. descIDs[desc.id] = struct{}{}
  335. }
  336. }
  337. r.mtx.RLock()
  338. if _, exists := r.collectorsByID[collectorID]; !exists {
  339. r.mtx.RUnlock()
  340. return false
  341. }
  342. r.mtx.RUnlock()
  343. r.mtx.Lock()
  344. defer r.mtx.Unlock()
  345. delete(r.collectorsByID, collectorID)
  346. for id := range descIDs {
  347. delete(r.descIDs, id)
  348. }
  349. // dimHashesByName is left untouched as those must be consistent
  350. // throughout the lifetime of a program.
  351. return true
  352. }
  353. // MustRegister implements Registerer.
  354. func (r *Registry) MustRegister(cs ...Collector) {
  355. for _, c := range cs {
  356. if err := r.Register(c); err != nil {
  357. panic(err)
  358. }
  359. }
  360. }
  361. // Gather implements Gatherer.
  362. func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
  363. var (
  364. checkedMetricChan = make(chan Metric, capMetricChan)
  365. uncheckedMetricChan = make(chan Metric, capMetricChan)
  366. metricHashes = map[uint64]struct{}{}
  367. wg sync.WaitGroup
  368. errs MultiError // The collected errors to return in the end.
  369. registeredDescIDs map[uint64]struct{} // Only used for pedantic checks
  370. )
  371. r.mtx.RLock()
  372. goroutineBudget := len(r.collectorsByID) + len(r.uncheckedCollectors)
  373. metricFamiliesByName := make(map[string]*dto.MetricFamily, len(r.dimHashesByName))
  374. checkedCollectors := make(chan Collector, len(r.collectorsByID))
  375. uncheckedCollectors := make(chan Collector, len(r.uncheckedCollectors))
  376. for _, collector := range r.collectorsByID {
  377. checkedCollectors <- collector
  378. }
  379. for _, collector := range r.uncheckedCollectors {
  380. uncheckedCollectors <- collector
  381. }
  382. // In case pedantic checks are enabled, we have to copy the map before
  383. // giving up the RLock.
  384. if r.pedanticChecksEnabled {
  385. registeredDescIDs = make(map[uint64]struct{}, len(r.descIDs))
  386. for id := range r.descIDs {
  387. registeredDescIDs[id] = struct{}{}
  388. }
  389. }
  390. r.mtx.RUnlock()
  391. wg.Add(goroutineBudget)
  392. collectWorker := func() {
  393. for {
  394. select {
  395. case collector := <-checkedCollectors:
  396. collector.Collect(checkedMetricChan)
  397. case collector := <-uncheckedCollectors:
  398. collector.Collect(uncheckedMetricChan)
  399. default:
  400. return
  401. }
  402. wg.Done()
  403. }
  404. }
  405. // Start the first worker now to make sure at least one is running.
  406. go collectWorker()
  407. goroutineBudget--
  408. // Close checkedMetricChan and uncheckedMetricChan once all collectors
  409. // are collected.
  410. go func() {
  411. wg.Wait()
  412. close(checkedMetricChan)
  413. close(uncheckedMetricChan)
  414. }()
  415. // Drain checkedMetricChan and uncheckedMetricChan in case of premature return.
  416. defer func() {
  417. if checkedMetricChan != nil {
  418. for range checkedMetricChan {
  419. }
  420. }
  421. if uncheckedMetricChan != nil {
  422. for range uncheckedMetricChan {
  423. }
  424. }
  425. }()
  426. // Copy the channel references so we can nil them out later to remove
  427. // them from the select statements below.
  428. cmc := checkedMetricChan
  429. umc := uncheckedMetricChan
  430. for {
  431. select {
  432. case metric, ok := <-cmc:
  433. if !ok {
  434. cmc = nil
  435. break
  436. }
  437. errs.Append(processMetric(
  438. metric, metricFamiliesByName,
  439. metricHashes,
  440. registeredDescIDs,
  441. ))
  442. case metric, ok := <-umc:
  443. if !ok {
  444. umc = nil
  445. break
  446. }
  447. errs.Append(processMetric(
  448. metric, metricFamiliesByName,
  449. metricHashes,
  450. nil,
  451. ))
  452. default:
  453. if goroutineBudget <= 0 || len(checkedCollectors)+len(uncheckedCollectors) == 0 {
  454. // All collectors are already being worked on or
  455. // we have already as many goroutines started as
  456. // there are collectors. Do the same as above,
  457. // just without the default.
  458. select {
  459. case metric, ok := <-cmc:
  460. if !ok {
  461. cmc = nil
  462. break
  463. }
  464. errs.Append(processMetric(
  465. metric, metricFamiliesByName,
  466. metricHashes,
  467. registeredDescIDs,
  468. ))
  469. case metric, ok := <-umc:
  470. if !ok {
  471. umc = nil
  472. break
  473. }
  474. errs.Append(processMetric(
  475. metric, metricFamiliesByName,
  476. metricHashes,
  477. nil,
  478. ))
  479. }
  480. break
  481. }
  482. // Start more workers.
  483. go collectWorker()
  484. goroutineBudget--
  485. runtime.Gosched()
  486. }
  487. // Once both checkedMetricChan and uncheckdMetricChan are closed
  488. // and drained, the contraption above will nil out cmc and umc,
  489. // and then we can leave the collect loop here.
  490. if cmc == nil && umc == nil {
  491. break
  492. }
  493. }
  494. return internal.NormalizeMetricFamilies(metricFamiliesByName), errs.MaybeUnwrap()
  495. }
  496. // WriteToTextfile calls Gather on the provided Gatherer, encodes the result in the
  497. // Prometheus text format, and writes it to a temporary file. Upon success, the
  498. // temporary file is renamed to the provided filename.
  499. //
  500. // This is intended for use with the textfile collector of the node exporter.
  501. // Note that the node exporter expects the filename to be suffixed with ".prom".
  502. func WriteToTextfile(filename string, g Gatherer) error {
  503. tmp, err := ioutil.TempFile(filepath.Dir(filename), filepath.Base(filename))
  504. if err != nil {
  505. return err
  506. }
  507. defer os.Remove(tmp.Name())
  508. mfs, err := g.Gather()
  509. if err != nil {
  510. return err
  511. }
  512. for _, mf := range mfs {
  513. if _, err := expfmt.MetricFamilyToText(tmp, mf); err != nil {
  514. return err
  515. }
  516. }
  517. if err := tmp.Close(); err != nil {
  518. return err
  519. }
  520. if err := os.Chmod(tmp.Name(), 0644); err != nil {
  521. return err
  522. }
  523. return os.Rename(tmp.Name(), filename)
  524. }
  525. // processMetric is an internal helper method only used by the Gather method.
  526. func processMetric(
  527. metric Metric,
  528. metricFamiliesByName map[string]*dto.MetricFamily,
  529. metricHashes map[uint64]struct{},
  530. registeredDescIDs map[uint64]struct{},
  531. ) error {
  532. desc := metric.Desc()
  533. // Wrapped metrics collected by an unchecked Collector can have an
  534. // invalid Desc.
  535. if desc.err != nil {
  536. return desc.err
  537. }
  538. dtoMetric := &dto.Metric{}
  539. if err := metric.Write(dtoMetric); err != nil {
  540. return fmt.Errorf("error collecting metric %v: %s", desc, err)
  541. }
  542. metricFamily, ok := metricFamiliesByName[desc.fqName]
  543. if ok { // Existing name.
  544. if metricFamily.GetHelp() != desc.help {
  545. return fmt.Errorf(
  546. "collected metric %s %s has help %q but should have %q",
  547. desc.fqName, dtoMetric, desc.help, metricFamily.GetHelp(),
  548. )
  549. }
  550. // TODO(beorn7): Simplify switch once Desc has type.
  551. switch metricFamily.GetType() {
  552. case dto.MetricType_COUNTER:
  553. if dtoMetric.Counter == nil {
  554. return fmt.Errorf(
  555. "collected metric %s %s should be a Counter",
  556. desc.fqName, dtoMetric,
  557. )
  558. }
  559. case dto.MetricType_GAUGE:
  560. if dtoMetric.Gauge == nil {
  561. return fmt.Errorf(
  562. "collected metric %s %s should be a Gauge",
  563. desc.fqName, dtoMetric,
  564. )
  565. }
  566. case dto.MetricType_SUMMARY:
  567. if dtoMetric.Summary == nil {
  568. return fmt.Errorf(
  569. "collected metric %s %s should be a Summary",
  570. desc.fqName, dtoMetric,
  571. )
  572. }
  573. case dto.MetricType_UNTYPED:
  574. if dtoMetric.Untyped == nil {
  575. return fmt.Errorf(
  576. "collected metric %s %s should be Untyped",
  577. desc.fqName, dtoMetric,
  578. )
  579. }
  580. case dto.MetricType_HISTOGRAM:
  581. if dtoMetric.Histogram == nil {
  582. return fmt.Errorf(
  583. "collected metric %s %s should be a Histogram",
  584. desc.fqName, dtoMetric,
  585. )
  586. }
  587. default:
  588. panic("encountered MetricFamily with invalid type")
  589. }
  590. } else { // New name.
  591. metricFamily = &dto.MetricFamily{}
  592. metricFamily.Name = proto.String(desc.fqName)
  593. metricFamily.Help = proto.String(desc.help)
  594. // TODO(beorn7): Simplify switch once Desc has type.
  595. switch {
  596. case dtoMetric.Gauge != nil:
  597. metricFamily.Type = dto.MetricType_GAUGE.Enum()
  598. case dtoMetric.Counter != nil:
  599. metricFamily.Type = dto.MetricType_COUNTER.Enum()
  600. case dtoMetric.Summary != nil:
  601. metricFamily.Type = dto.MetricType_SUMMARY.Enum()
  602. case dtoMetric.Untyped != nil:
  603. metricFamily.Type = dto.MetricType_UNTYPED.Enum()
  604. case dtoMetric.Histogram != nil:
  605. metricFamily.Type = dto.MetricType_HISTOGRAM.Enum()
  606. default:
  607. return fmt.Errorf("empty metric collected: %s", dtoMetric)
  608. }
  609. if err := checkSuffixCollisions(metricFamily, metricFamiliesByName); err != nil {
  610. return err
  611. }
  612. metricFamiliesByName[desc.fqName] = metricFamily
  613. }
  614. if err := checkMetricConsistency(metricFamily, dtoMetric, metricHashes); err != nil {
  615. return err
  616. }
  617. if registeredDescIDs != nil {
  618. // Is the desc registered at all?
  619. if _, exist := registeredDescIDs[desc.id]; !exist {
  620. return fmt.Errorf(
  621. "collected metric %s %s with unregistered descriptor %s",
  622. metricFamily.GetName(), dtoMetric, desc,
  623. )
  624. }
  625. if err := checkDescConsistency(metricFamily, dtoMetric, desc); err != nil {
  626. return err
  627. }
  628. }
  629. metricFamily.Metric = append(metricFamily.Metric, dtoMetric)
  630. return nil
  631. }
  632. // Gatherers is a slice of Gatherer instances that implements the Gatherer
  633. // interface itself. Its Gather method calls Gather on all Gatherers in the
  634. // slice in order and returns the merged results. Errors returned from the
  635. // Gather calls are all returned in a flattened MultiError. Duplicate and
  636. // inconsistent Metrics are skipped (first occurrence in slice order wins) and
  637. // reported in the returned error.
  638. //
  639. // Gatherers can be used to merge the Gather results from multiple
  640. // Registries. It also provides a way to directly inject existing MetricFamily
  641. // protobufs into the gathering by creating a custom Gatherer with a Gather
  642. // method that simply returns the existing MetricFamily protobufs. Note that no
  643. // registration is involved (in contrast to Collector registration), so
  644. // obviously registration-time checks cannot happen. Any inconsistencies between
  645. // the gathered MetricFamilies are reported as errors by the Gather method, and
  646. // inconsistent Metrics are dropped. Invalid parts of the MetricFamilies
  647. // (e.g. syntactically invalid metric or label names) will go undetected.
  648. type Gatherers []Gatherer
  649. // Gather implements Gatherer.
  650. func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) {
  651. var (
  652. metricFamiliesByName = map[string]*dto.MetricFamily{}
  653. metricHashes = map[uint64]struct{}{}
  654. errs MultiError // The collected errors to return in the end.
  655. )
  656. for i, g := range gs {
  657. mfs, err := g.Gather()
  658. if err != nil {
  659. if multiErr, ok := err.(MultiError); ok {
  660. for _, err := range multiErr {
  661. errs = append(errs, fmt.Errorf("[from Gatherer #%d] %s", i+1, err))
  662. }
  663. } else {
  664. errs = append(errs, fmt.Errorf("[from Gatherer #%d] %s", i+1, err))
  665. }
  666. }
  667. for _, mf := range mfs {
  668. existingMF, exists := metricFamiliesByName[mf.GetName()]
  669. if exists {
  670. if existingMF.GetHelp() != mf.GetHelp() {
  671. errs = append(errs, fmt.Errorf(
  672. "gathered metric family %s has help %q but should have %q",
  673. mf.GetName(), mf.GetHelp(), existingMF.GetHelp(),
  674. ))
  675. continue
  676. }
  677. if existingMF.GetType() != mf.GetType() {
  678. errs = append(errs, fmt.Errorf(
  679. "gathered metric family %s has type %s but should have %s",
  680. mf.GetName(), mf.GetType(), existingMF.GetType(),
  681. ))
  682. continue
  683. }
  684. } else {
  685. existingMF = &dto.MetricFamily{}
  686. existingMF.Name = mf.Name
  687. existingMF.Help = mf.Help
  688. existingMF.Type = mf.Type
  689. if err := checkSuffixCollisions(existingMF, metricFamiliesByName); err != nil {
  690. errs = append(errs, err)
  691. continue
  692. }
  693. metricFamiliesByName[mf.GetName()] = existingMF
  694. }
  695. for _, m := range mf.Metric {
  696. if err := checkMetricConsistency(existingMF, m, metricHashes); err != nil {
  697. errs = append(errs, err)
  698. continue
  699. }
  700. existingMF.Metric = append(existingMF.Metric, m)
  701. }
  702. }
  703. }
  704. return internal.NormalizeMetricFamilies(metricFamiliesByName), errs.MaybeUnwrap()
  705. }
  706. // checkSuffixCollisions checks for collisions with the “magic” suffixes the
  707. // Prometheus text format and the internal metric representation of the
  708. // Prometheus server add while flattening Summaries and Histograms.
  709. func checkSuffixCollisions(mf *dto.MetricFamily, mfs map[string]*dto.MetricFamily) error {
  710. var (
  711. newName = mf.GetName()
  712. newType = mf.GetType()
  713. newNameWithoutSuffix = ""
  714. )
  715. switch {
  716. case strings.HasSuffix(newName, "_count"):
  717. newNameWithoutSuffix = newName[:len(newName)-6]
  718. case strings.HasSuffix(newName, "_sum"):
  719. newNameWithoutSuffix = newName[:len(newName)-4]
  720. case strings.HasSuffix(newName, "_bucket"):
  721. newNameWithoutSuffix = newName[:len(newName)-7]
  722. }
  723. if newNameWithoutSuffix != "" {
  724. if existingMF, ok := mfs[newNameWithoutSuffix]; ok {
  725. switch existingMF.GetType() {
  726. case dto.MetricType_SUMMARY:
  727. if !strings.HasSuffix(newName, "_bucket") {
  728. return fmt.Errorf(
  729. "collected metric named %q collides with previously collected summary named %q",
  730. newName, newNameWithoutSuffix,
  731. )
  732. }
  733. case dto.MetricType_HISTOGRAM:
  734. return fmt.Errorf(
  735. "collected metric named %q collides with previously collected histogram named %q",
  736. newName, newNameWithoutSuffix,
  737. )
  738. }
  739. }
  740. }
  741. if newType == dto.MetricType_SUMMARY || newType == dto.MetricType_HISTOGRAM {
  742. if _, ok := mfs[newName+"_count"]; ok {
  743. return fmt.Errorf(
  744. "collected histogram or summary named %q collides with previously collected metric named %q",
  745. newName, newName+"_count",
  746. )
  747. }
  748. if _, ok := mfs[newName+"_sum"]; ok {
  749. return fmt.Errorf(
  750. "collected histogram or summary named %q collides with previously collected metric named %q",
  751. newName, newName+"_sum",
  752. )
  753. }
  754. }
  755. if newType == dto.MetricType_HISTOGRAM {
  756. if _, ok := mfs[newName+"_bucket"]; ok {
  757. return fmt.Errorf(
  758. "collected histogram named %q collides with previously collected metric named %q",
  759. newName, newName+"_bucket",
  760. )
  761. }
  762. }
  763. return nil
  764. }
  765. // checkMetricConsistency checks if the provided Metric is consistent with the
  766. // provided MetricFamily. It also hashes the Metric labels and the MetricFamily
  767. // name. If the resulting hash is already in the provided metricHashes, an error
  768. // is returned. If not, it is added to metricHashes.
  769. func checkMetricConsistency(
  770. metricFamily *dto.MetricFamily,
  771. dtoMetric *dto.Metric,
  772. metricHashes map[uint64]struct{},
  773. ) error {
  774. name := metricFamily.GetName()
  775. // Type consistency with metric family.
  776. if metricFamily.GetType() == dto.MetricType_GAUGE && dtoMetric.Gauge == nil ||
  777. metricFamily.GetType() == dto.MetricType_COUNTER && dtoMetric.Counter == nil ||
  778. metricFamily.GetType() == dto.MetricType_SUMMARY && dtoMetric.Summary == nil ||
  779. metricFamily.GetType() == dto.MetricType_HISTOGRAM && dtoMetric.Histogram == nil ||
  780. metricFamily.GetType() == dto.MetricType_UNTYPED && dtoMetric.Untyped == nil {
  781. return fmt.Errorf(
  782. "collected metric %q { %s} is not a %s",
  783. name, dtoMetric, metricFamily.GetType(),
  784. )
  785. }
  786. previousLabelName := ""
  787. for _, labelPair := range dtoMetric.GetLabel() {
  788. labelName := labelPair.GetName()
  789. if labelName == previousLabelName {
  790. return fmt.Errorf(
  791. "collected metric %q { %s} has two or more labels with the same name: %s",
  792. name, dtoMetric, labelName,
  793. )
  794. }
  795. if !checkLabelName(labelName) {
  796. return fmt.Errorf(
  797. "collected metric %q { %s} has a label with an invalid name: %s",
  798. name, dtoMetric, labelName,
  799. )
  800. }
  801. if dtoMetric.Summary != nil && labelName == quantileLabel {
  802. return fmt.Errorf(
  803. "collected metric %q { %s} must not have an explicit %q label",
  804. name, dtoMetric, quantileLabel,
  805. )
  806. }
  807. if !utf8.ValidString(labelPair.GetValue()) {
  808. return fmt.Errorf(
  809. "collected metric %q { %s} has a label named %q whose value is not utf8: %#v",
  810. name, dtoMetric, labelName, labelPair.GetValue())
  811. }
  812. previousLabelName = labelName
  813. }
  814. // Is the metric unique (i.e. no other metric with the same name and the same labels)?
  815. h := hashNew()
  816. h = hashAdd(h, name)
  817. h = hashAddByte(h, separatorByte)
  818. // Make sure label pairs are sorted. We depend on it for the consistency
  819. // check.
  820. if !sort.IsSorted(labelPairSorter(dtoMetric.Label)) {
  821. // We cannot sort dtoMetric.Label in place as it is immutable by contract.
  822. copiedLabels := make([]*dto.LabelPair, len(dtoMetric.Label))
  823. copy(copiedLabels, dtoMetric.Label)
  824. sort.Sort(labelPairSorter(copiedLabels))
  825. dtoMetric.Label = copiedLabels
  826. }
  827. for _, lp := range dtoMetric.Label {
  828. h = hashAdd(h, lp.GetName())
  829. h = hashAddByte(h, separatorByte)
  830. h = hashAdd(h, lp.GetValue())
  831. h = hashAddByte(h, separatorByte)
  832. }
  833. if _, exists := metricHashes[h]; exists {
  834. return fmt.Errorf(
  835. "collected metric %q { %s} was collected before with the same name and label values",
  836. name, dtoMetric,
  837. )
  838. }
  839. metricHashes[h] = struct{}{}
  840. return nil
  841. }
  842. func checkDescConsistency(
  843. metricFamily *dto.MetricFamily,
  844. dtoMetric *dto.Metric,
  845. desc *Desc,
  846. ) error {
  847. // Desc help consistency with metric family help.
  848. if metricFamily.GetHelp() != desc.help {
  849. return fmt.Errorf(
  850. "collected metric %s %s has help %q but should have %q",
  851. metricFamily.GetName(), dtoMetric, metricFamily.GetHelp(), desc.help,
  852. )
  853. }
  854. // Is the desc consistent with the content of the metric?
  855. lpsFromDesc := make([]*dto.LabelPair, len(desc.constLabelPairs), len(dtoMetric.Label))
  856. copy(lpsFromDesc, desc.constLabelPairs)
  857. for _, l := range desc.variableLabels {
  858. lpsFromDesc = append(lpsFromDesc, &dto.LabelPair{
  859. Name: proto.String(l),
  860. })
  861. }
  862. if len(lpsFromDesc) != len(dtoMetric.Label) {
  863. return fmt.Errorf(
  864. "labels in collected metric %s %s are inconsistent with descriptor %s",
  865. metricFamily.GetName(), dtoMetric, desc,
  866. )
  867. }
  868. sort.Sort(labelPairSorter(lpsFromDesc))
  869. for i, lpFromDesc := range lpsFromDesc {
  870. lpFromMetric := dtoMetric.Label[i]
  871. if lpFromDesc.GetName() != lpFromMetric.GetName() ||
  872. lpFromDesc.Value != nil && lpFromDesc.GetValue() != lpFromMetric.GetValue() {
  873. return fmt.Errorf(
  874. "labels in collected metric %s %s are inconsistent with descriptor %s",
  875. metricFamily.GetName(), dtoMetric, desc,
  876. )
  877. }
  878. }
  879. return nil
  880. }