You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

diff.rb 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. class Diff
  2. VERSION = 0.3
  3. def Diff.lcs(a, b)
  4. astart = 0
  5. bstart = 0
  6. afinish = a.length-1
  7. bfinish = b.length-1
  8. mvector = []
  9. # First we prune off any common elements at the beginning
  10. while (astart <= afinish && bstart <= afinish && a[astart] == b[bstart])
  11. mvector[astart] = bstart
  12. astart += 1
  13. bstart += 1
  14. end
  15. # now the end
  16. while (astart <= afinish && bstart <= bfinish && a[afinish] == b[bfinish])
  17. mvector[afinish] = bfinish
  18. afinish -= 1
  19. bfinish -= 1
  20. end
  21. bmatches = b.reverse_hash(bstart..bfinish)
  22. thresh = []
  23. links = []
  24. (astart..afinish).each { |aindex|
  25. aelem = a[aindex]
  26. next unless bmatches.has_key? aelem
  27. k = nil
  28. bmatches[aelem].reverse.each { |bindex|
  29. if k && (thresh[k] > bindex) && (thresh[k-1] < bindex)
  30. thresh[k] = bindex
  31. else
  32. k = thresh.replacenextlarger(bindex, k)
  33. end
  34. links[k] = [ (k==0) ? nil : links[k-1], aindex, bindex ] if k
  35. }
  36. }
  37. if !thresh.empty?
  38. link = links[thresh.length-1]
  39. while link
  40. mvector[link[1]] = link[2]
  41. link = link[0]
  42. end
  43. end
  44. return mvector
  45. end
  46. def makediff(a, b)
  47. mvector = Diff.lcs(a, b)
  48. ai = bi = 0
  49. while ai < mvector.length
  50. bline = mvector[ai]
  51. if bline
  52. while bi < bline
  53. discardb(bi, b[bi])
  54. bi += 1
  55. end
  56. match(ai, bi)
  57. bi += 1
  58. else
  59. discarda(ai, a[ai])
  60. end
  61. ai += 1
  62. end
  63. while ai < a.length
  64. discarda(ai, a[ai])
  65. ai += 1
  66. end
  67. while bi < b.length
  68. discardb(bi, b[bi])
  69. bi += 1
  70. end
  71. match(ai, bi)
  72. 1
  73. end
  74. def compactdiffs
  75. diffs = []
  76. @diffs.each { |df|
  77. i = 0
  78. curdiff = []
  79. while i < df.length
  80. whot = df[i][0]
  81. s = @isstring ? df[i][2].chr : [df[i][2]]
  82. p = df[i][1]
  83. last = df[i][1]
  84. i += 1
  85. while df[i] && df[i][0] == whot && df[i][1] == last+1
  86. s << df[i][2]
  87. last = df[i][1]
  88. i += 1
  89. end
  90. curdiff.push [whot, p, s]
  91. end
  92. diffs.push curdiff
  93. }
  94. return diffs
  95. end
  96. attr_reader :diffs, :difftype
  97. def initialize(diffs_or_a, b = nil, isstring = nil)
  98. if b.nil?
  99. @diffs = diffs_or_a
  100. @isstring = isstring
  101. else
  102. @diffs = []
  103. @curdiffs = []
  104. makediff(diffs_or_a, b)
  105. @difftype = diffs_or_a.class
  106. end
  107. end
  108. def match(ai, bi)
  109. @diffs.push @curdiffs unless @curdiffs.empty?
  110. @curdiffs = []
  111. end
  112. def discarda(i, elem)
  113. @curdiffs.push ['-', i, elem]
  114. end
  115. def discardb(i, elem)
  116. @curdiffs.push ['+', i, elem]
  117. end
  118. def compact
  119. return Diff.new(compactdiffs)
  120. end
  121. def compact!
  122. @diffs = compactdiffs
  123. end
  124. def inspect
  125. @diffs.inspect
  126. end
  127. end
  128. module Diffable
  129. def diff(b)
  130. Diff.new(self, b)
  131. end
  132. # Create a hash that maps elements of the array to arrays of indices
  133. # where the elements are found.
  134. def reverse_hash(range = (0...self.length))
  135. revmap = {}
  136. range.each { |i|
  137. elem = self[i]
  138. if revmap.has_key? elem
  139. revmap[elem].push i
  140. else
  141. revmap[elem] = [i]
  142. end
  143. }
  144. return revmap
  145. end
  146. def replacenextlarger(value, high = nil)
  147. high ||= self.length
  148. if self.empty? || value > self[-1]
  149. push value
  150. return high
  151. end
  152. # binary search for replacement point
  153. low = 0
  154. while low < high
  155. index = (high+low)/2
  156. found = self[index]
  157. return nil if value == found
  158. if value > found
  159. low = index + 1
  160. else
  161. high = index
  162. end
  163. end
  164. self[low] = value
  165. # $stderr << "replace #{value} : 0/#{low}/#{init_high} (#{steps} steps) (#{init_high-low} off )\n"
  166. # $stderr.puts self.inspect
  167. #gets
  168. #p length - low
  169. return low
  170. end
  171. def patch(diff)
  172. newary = nil
  173. if diff.difftype == String
  174. newary = diff.difftype.new('')
  175. else
  176. newary = diff.difftype.new
  177. end
  178. ai = 0
  179. bi = 0
  180. diff.diffs.each { |d|
  181. d.each { |mod|
  182. case mod[0]
  183. when '-'
  184. while ai < mod[1]
  185. newary << self[ai]
  186. ai += 1
  187. bi += 1
  188. end
  189. ai += 1
  190. when '+'
  191. while bi < mod[1]
  192. newary << self[ai]
  193. ai += 1
  194. bi += 1
  195. end
  196. newary << mod[2]
  197. bi += 1
  198. else
  199. raise "Unknown diff action"
  200. end
  201. }
  202. }
  203. while ai < self.length
  204. newary << self[ai]
  205. ai += 1
  206. bi += 1
  207. end
  208. return newary
  209. end
  210. end
  211. class Array
  212. include Diffable
  213. end
  214. class String
  215. include Diffable
  216. end
  217. =begin
  218. = Diff
  219. (({diff.rb})) - computes the differences between two arrays or
  220. strings. Copyright (C) 2001 Lars Christensen
  221. == Synopsis
  222. diff = Diff.new(a, b)
  223. b = a.patch(diff)
  224. == Class Diff
  225. === Class Methods
  226. --- Diff.new(a, b)
  227. --- a.diff(b)
  228. Creates a Diff object which represent the differences between
  229. ((|a|)) and ((|b|)). ((|a|)) and ((|b|)) can be either be arrays
  230. of any objects, strings, or object of any class that include
  231. module ((|Diffable|))
  232. == Module Diffable
  233. The module ((|Diffable|)) is intended to be included in any class for
  234. which differences are to be computed. Diffable is included into String
  235. and Array when (({diff.rb})) is (({require}))'d.
  236. Classes including Diffable should implement (({[]})) to get element at
  237. integer indices, (({<<})) to append elements to the object and
  238. (({ClassName#new})) should accept 0 arguments to create a new empty
  239. object.
  240. === Instance Methods
  241. --- Diffable#patch(diff)
  242. Applies the differences from ((|diff|)) to the object ((|obj|))
  243. and return the result. ((|obj|)) is not changed. ((|obj|)) and
  244. can be either an array or a string, but must match the object
  245. from which the ((|diff|)) was created.
  246. =end