Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

manager_stacktraces.go 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. // Copyright 2022 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package process
  4. import (
  5. "fmt"
  6. "io"
  7. "runtime/pprof"
  8. "sort"
  9. "time"
  10. "github.com/google/pprof/profile"
  11. )
  12. // StackEntry is an entry on a stacktrace
  13. type StackEntry struct {
  14. Function string
  15. File string
  16. Line int
  17. }
  18. // Label represents a pprof label assigned to goroutine stack
  19. type Label struct {
  20. Name string
  21. Value string
  22. }
  23. // Stack is a stacktrace relating to a goroutine. (Multiple goroutines may have the same stacktrace)
  24. type Stack struct {
  25. Count int64 // Number of goroutines with this stack trace
  26. Description string
  27. Labels []*Label `json:",omitempty"`
  28. Entry []*StackEntry `json:",omitempty"`
  29. }
  30. // A Process is a combined representation of a Process and a Stacktrace for the goroutines associated with it
  31. type Process struct {
  32. PID IDType
  33. ParentPID IDType
  34. Description string
  35. Start time.Time
  36. Type string
  37. Children []*Process `json:",omitempty"`
  38. Stacks []*Stack `json:",omitempty"`
  39. }
  40. // Processes gets the processes in a thread safe manner
  41. func (pm *Manager) Processes(flat, noSystem bool) ([]*Process, int) {
  42. pm.mutex.Lock()
  43. processCount := len(pm.processMap)
  44. processes := make([]*Process, 0, len(pm.processMap))
  45. if flat {
  46. for _, process := range pm.processMap {
  47. if noSystem && process.Type == SystemProcessType {
  48. continue
  49. }
  50. processes = append(processes, process.toProcess())
  51. }
  52. } else {
  53. // We need our own processMap
  54. processMap := map[IDType]*Process{}
  55. for _, internalProcess := range pm.processMap {
  56. process, ok := processMap[internalProcess.PID]
  57. if !ok {
  58. process = internalProcess.toProcess()
  59. processMap[process.PID] = process
  60. }
  61. // Check its parent
  62. if process.ParentPID == "" {
  63. processes = append(processes, process)
  64. continue
  65. }
  66. internalParentProcess, ok := pm.processMap[internalProcess.ParentPID]
  67. if ok {
  68. parentProcess, ok := processMap[process.ParentPID]
  69. if !ok {
  70. parentProcess = internalParentProcess.toProcess()
  71. processMap[parentProcess.PID] = parentProcess
  72. }
  73. parentProcess.Children = append(parentProcess.Children, process)
  74. continue
  75. }
  76. processes = append(processes, process)
  77. }
  78. }
  79. pm.mutex.Unlock()
  80. if !flat && noSystem {
  81. for i := 0; i < len(processes); i++ {
  82. process := processes[i]
  83. if process.Type != SystemProcessType {
  84. continue
  85. }
  86. processes[len(processes)-1], processes[i] = processes[i], processes[len(processes)-1]
  87. processes = append(processes[:len(processes)-1], process.Children...)
  88. i--
  89. }
  90. }
  91. // Sort by process' start time. Oldest process appears first.
  92. sort.Slice(processes, func(i, j int) bool {
  93. left, right := processes[i], processes[j]
  94. return left.Start.Before(right.Start)
  95. })
  96. return processes, processCount
  97. }
  98. // ProcessStacktraces gets the processes and stacktraces in a thread safe manner
  99. func (pm *Manager) ProcessStacktraces(flat, noSystem bool) ([]*Process, int, int64, error) {
  100. var stacks *profile.Profile
  101. var err error
  102. // We cannot use the pm.ProcessMap here because we will release the mutex ...
  103. processMap := map[IDType]*Process{}
  104. var processCount int
  105. // Lock the manager
  106. pm.mutex.Lock()
  107. processCount = len(pm.processMap)
  108. // Add a defer to unlock in case there is a panic
  109. unlocked := false
  110. defer func() {
  111. if !unlocked {
  112. pm.mutex.Unlock()
  113. }
  114. }()
  115. processes := make([]*Process, 0, len(pm.processMap))
  116. if flat {
  117. for _, internalProcess := range pm.processMap {
  118. process := internalProcess.toProcess()
  119. processMap[process.PID] = process
  120. if noSystem && internalProcess.Type == SystemProcessType {
  121. continue
  122. }
  123. processes = append(processes, process)
  124. }
  125. } else {
  126. for _, internalProcess := range pm.processMap {
  127. process, ok := processMap[internalProcess.PID]
  128. if !ok {
  129. process = internalProcess.toProcess()
  130. processMap[process.PID] = process
  131. }
  132. // Check its parent
  133. if process.ParentPID == "" {
  134. processes = append(processes, process)
  135. continue
  136. }
  137. internalParentProcess, ok := pm.processMap[internalProcess.ParentPID]
  138. if ok {
  139. parentProcess, ok := processMap[process.ParentPID]
  140. if !ok {
  141. parentProcess = internalParentProcess.toProcess()
  142. processMap[parentProcess.PID] = parentProcess
  143. }
  144. parentProcess.Children = append(parentProcess.Children, process)
  145. continue
  146. }
  147. processes = append(processes, process)
  148. }
  149. }
  150. // Now from within the lock we need to get the goroutines.
  151. // Why? If we release the lock then between between filling the above map and getting
  152. // the stacktraces another process could be created which would then look like a dead process below
  153. reader, writer := io.Pipe()
  154. defer reader.Close()
  155. go func() {
  156. err := pprof.Lookup("goroutine").WriteTo(writer, 0)
  157. _ = writer.CloseWithError(err)
  158. }()
  159. stacks, err = profile.Parse(reader)
  160. if err != nil {
  161. return nil, 0, 0, err
  162. }
  163. // Unlock the mutex
  164. pm.mutex.Unlock()
  165. unlocked = true
  166. goroutineCount := int64(0)
  167. // Now walk through the "Sample" slice in the goroutines stack
  168. for _, sample := range stacks.Sample {
  169. // In the "goroutine" pprof profile each sample represents one or more goroutines
  170. // with the same labels and stacktraces.
  171. // We will represent each goroutine by a `Stack`
  172. stack := &Stack{}
  173. // Add the non-process associated labels from the goroutine sample to the Stack
  174. for name, value := range sample.Label {
  175. if name == DescriptionPProfLabel || name == PIDPProfLabel || (!flat && name == PPIDPProfLabel) || name == ProcessTypePProfLabel {
  176. continue
  177. }
  178. // Labels from the "goroutine" pprof profile only have one value.
  179. // This is because the underlying representation is a map[string]string
  180. if len(value) != 1 {
  181. // Unexpected...
  182. return nil, 0, 0, fmt.Errorf("label: %s in goroutine stack with unexpected number of values: %v", name, value)
  183. }
  184. stack.Labels = append(stack.Labels, &Label{Name: name, Value: value[0]})
  185. }
  186. // The number of goroutines that this sample represents is the `stack.Value[0]`
  187. stack.Count = sample.Value[0]
  188. goroutineCount += stack.Count
  189. // Now we want to associate this Stack with a Process.
  190. var process *Process
  191. // Try to get the PID from the goroutine labels
  192. if pidvalue, ok := sample.Label[PIDPProfLabel]; ok && len(pidvalue) == 1 {
  193. pid := IDType(pidvalue[0])
  194. // Now try to get the process from our map
  195. process, ok = processMap[pid]
  196. if !ok && pid != "" {
  197. // This means that no process has been found in the process map - but there was a process PID
  198. // Therefore this goroutine belongs to a dead process and it has escaped control of the process as it
  199. // should have died with the process context cancellation.
  200. // We need to create a dead process holder for this process and label it appropriately
  201. // get the parent PID
  202. ppid := IDType("")
  203. if value, ok := sample.Label[PPIDPProfLabel]; ok && len(value) == 1 {
  204. ppid = IDType(value[0])
  205. }
  206. // format the description
  207. description := "(dead process)"
  208. if value, ok := sample.Label[DescriptionPProfLabel]; ok && len(value) == 1 {
  209. description = value[0] + " " + description
  210. }
  211. // override the type of the process to "code" but add the old type as a label on the first stack
  212. ptype := NoneProcessType
  213. if value, ok := sample.Label[ProcessTypePProfLabel]; ok && len(value) == 1 {
  214. stack.Labels = append(stack.Labels, &Label{Name: ProcessTypePProfLabel, Value: value[0]})
  215. }
  216. process = &Process{
  217. PID: pid,
  218. ParentPID: ppid,
  219. Description: description,
  220. Type: ptype,
  221. }
  222. // Now add the dead process back to the map and tree so we don't go back through this again.
  223. processMap[process.PID] = process
  224. added := false
  225. if process.ParentPID != "" && !flat {
  226. if parent, ok := processMap[process.ParentPID]; ok {
  227. parent.Children = append(parent.Children, process)
  228. added = true
  229. }
  230. }
  231. if !added {
  232. processes = append(processes, process)
  233. }
  234. }
  235. }
  236. if process == nil {
  237. // This means that the sample we're looking has no PID label
  238. var ok bool
  239. process, ok = processMap[""]
  240. if !ok {
  241. // this is the first time we've come acrross an unassociated goroutine so create a "process" to hold them
  242. process = &Process{
  243. Description: "(unassociated)",
  244. Type: NoneProcessType,
  245. }
  246. processMap[process.PID] = process
  247. processes = append(processes, process)
  248. }
  249. }
  250. // The sample.Location represents a stack trace for this goroutine,
  251. // however each Location can represent multiple lines (mostly due to inlining)
  252. // so we need to walk the lines too
  253. for _, location := range sample.Location {
  254. for _, line := range location.Line {
  255. entry := &StackEntry{
  256. Function: line.Function.Name,
  257. File: line.Function.Filename,
  258. Line: int(line.Line),
  259. }
  260. stack.Entry = append(stack.Entry, entry)
  261. }
  262. }
  263. // Now we need a short-descriptive name to call the stack trace if when it is folded and
  264. // assuming the stack trace has some lines we'll choose the bottom of the stack (i.e. the
  265. // initial function that started the stack trace.) The top of the stack is unlikely to
  266. // be very helpful as a lot of the time it will be runtime.select or some other call into
  267. // a std library.
  268. stack.Description = "(unknown)"
  269. if len(stack.Entry) > 0 {
  270. stack.Description = stack.Entry[len(stack.Entry)-1].Function
  271. }
  272. process.Stacks = append(process.Stacks, stack)
  273. }
  274. // restrict to not show system processes
  275. if noSystem {
  276. for i := 0; i < len(processes); i++ {
  277. process := processes[i]
  278. if process.Type != SystemProcessType && process.Type != NoneProcessType {
  279. continue
  280. }
  281. processes[len(processes)-1], processes[i] = processes[i], processes[len(processes)-1]
  282. processes = append(processes[:len(processes)-1], process.Children...)
  283. i--
  284. }
  285. }
  286. // Now finally re-sort the processes. Newest process appears first
  287. after := func(processes []*Process) func(i, j int) bool {
  288. return func(i, j int) bool {
  289. left, right := processes[i], processes[j]
  290. return left.Start.After(right.Start)
  291. }
  292. }
  293. sort.Slice(processes, after(processes))
  294. if !flat {
  295. var sortChildren func(process *Process)
  296. sortChildren = func(process *Process) {
  297. sort.Slice(process.Children, after(process.Children))
  298. for _, child := range process.Children {
  299. sortChildren(child)
  300. }
  301. }
  302. }
  303. return processes, processCount, goroutineCount, err
  304. }