You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

stem_ISO_8859_1.sbl 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. strings ( ch )
  2. integers ( x p1 p2 )
  3. booleans ( Y_found stemmed GE_removed )
  4. routines (
  5. R1 R2
  6. C V VX
  7. lengthen_V
  8. Step_1 Step_2 Step_3 Step_4 Step_7
  9. Step_6 Step_1c
  10. Lose_prefix
  11. Lose_infix
  12. measure
  13. )
  14. externals ( stem )
  15. groupings ( v v_WX AOU AIOU )
  16. stringescapes {}
  17. stringdef ' hex '27' // yuk
  18. define v 'aeiouy'
  19. define v_WX v + 'wx'
  20. define AOU 'aou'
  21. define AIOU 'aiou'
  22. backwardmode (
  23. define R1 as (setmark x $x >= p1)
  24. define R2 as (setmark x $x >= p2)
  25. define V as test (v or 'ij')
  26. define VX as test (next v or 'ij')
  27. define C as test (not 'ij' non-v)
  28. define lengthen_V as do (
  29. non-v_WX [ (AOU] test (non-v or atlimit)) or
  30. ('e'] test (non-v or atlimit
  31. not AIOU
  32. not (next AIOU non-v)))
  33. ->ch insert ch
  34. )
  35. define Step_1 as
  36. (
  37. [among ( (])
  38. '{'}s' (delete)
  39. 's' (R1 not ('t' R1) C delete)
  40. 'ies' (R1 <-'ie')
  41. 'es'
  42. (('ar' R1 C ] delete lengthen_V) or
  43. ('er' R1 C ] delete) or
  44. (R1 C <-'e'))
  45. 'aus' (R1 V <-'au')
  46. 'en' (('hed' R1 ] <-'heid') or
  47. ('nd' delete) or
  48. ('d' R1 C ] delete) or
  49. ('i' or 'j' V delete) or
  50. (R1 C delete lengthen_V))
  51. 'nde' (<-'nd')
  52. )
  53. )
  54. define Step_2 as
  55. (
  56. [among ( (])
  57. 'je' (('{'}t' ] delete) or
  58. ('et' ] R1 C delete) or
  59. ('rnt' ] <-'rn') or
  60. ('t' ] R1 VX delete) or
  61. ('ink' ] <-'ing') or
  62. ('mp' ] <-'m') or
  63. ('{'}' ] R1 delete) or
  64. (] R1 C delete))
  65. 'ge' (R1 <-'g')
  66. 'lijke'(R1 <-'lijk')
  67. 'ische'(R1 <-'isch')
  68. 'de' (R1 C delete)
  69. 'te' (R1 <-'t')
  70. 'se' (R1 <-'s')
  71. 're' (R1 <-'r')
  72. 'le' (R1 delete attach 'l' lengthen_V)
  73. 'ene' (R1 C delete attach 'en' lengthen_V)
  74. 'ieve' (R1 C <-'ief')
  75. )
  76. )
  77. define Step_3 as
  78. (
  79. [among ( (])
  80. 'atie' (R1 <-'eer')
  81. 'iteit' (R1 delete lengthen_V)
  82. 'heid'
  83. 'sel'
  84. 'ster' (R1 delete)
  85. 'rder' (<-'r')
  86. 'ing'
  87. 'isme'
  88. 'erij' (R1 delete lengthen_V)
  89. 'arij' (R1 C <-'aar')
  90. 'fie' (R2 delete attach 'f' lengthen_V)
  91. 'gie' (R2 delete attach 'g' lengthen_V)
  92. 'tst' (R1 C <-'t')
  93. 'dst' (R1 C <-'d')
  94. )
  95. )
  96. define Step_4 as
  97. (
  98. ( [among ( (])
  99. 'ioneel' (R1 <-'ie')
  100. 'atief' (R1 <-'eer')
  101. 'baar' (R1 delete)
  102. 'naar' (R1 V <-'n')
  103. 'laar' (R1 V <-'l')
  104. 'raar' (R1 V <-'r')
  105. 'tant' (R1 <-'teer')
  106. 'lijker'
  107. 'lijkst' (R1 <-'lijk')
  108. 'achtig'
  109. 'achtiger'
  110. 'achtigst'(R1 delete)
  111. 'eriger'
  112. 'erigst'
  113. 'erig'
  114. 'end' (R1 C delete lengthen_V)
  115. )
  116. )
  117. or
  118. ( [among ( (])
  119. 'iger'
  120. 'igst'
  121. 'ig' (R1 C delete lengthen_V)
  122. )
  123. )
  124. )
  125. define Step_7 as
  126. (
  127. [among ( (])
  128. 'kt' (<-'k')
  129. 'ft' (<-'f')
  130. 'pt' (<-'p')
  131. )
  132. )
  133. define Step_6 as
  134. (
  135. [among ( (])
  136. 'bb' (<-'b')
  137. 'cc' (<-'c')
  138. 'dd' (<-'d')
  139. 'ff' (<-'f')
  140. 'gg' (<-'g')
  141. 'hh' (<-'h')
  142. 'jj' (<-'j')
  143. 'kk' (<-'k')
  144. 'll' (<-'l')
  145. 'mm' (<-'m')
  146. 'nn' (<-'n')
  147. 'pp' (<-'p')
  148. 'qq' (<-'q')
  149. 'rr' (<-'r')
  150. 'ss' (<-'s')
  151. 'tt' (<-'t')
  152. 'vv' (<-'v')
  153. 'ww' (<-'w')
  154. 'xx' (<-'x')
  155. 'zz' (<-'z')
  156. 'v' (<-'f')
  157. 'z' (<-'s')
  158. )
  159. )
  160. define Step_1c as
  161. (
  162. [among ( (] R1 C)
  163. 'd' (not ('n' R1) delete)
  164. 't' (not ('h' R1) delete)
  165. )
  166. )
  167. )
  168. define Lose_prefix as (
  169. ['ge'] test hop 3 (goto v goto non-v)
  170. set GE_removed
  171. delete
  172. )
  173. define Lose_infix as (
  174. next
  175. gopast (['ge']) test hop 3 (goto v goto non-v)
  176. set GE_removed
  177. delete
  178. )
  179. define measure as (
  180. do (
  181. tolimit
  182. setmark p1
  183. setmark p2
  184. )
  185. do(
  186. repeat non-v atleast 1 ('ij' or v) non-v setmark p1
  187. repeat non-v atleast 1 ('ij' or v) non-v setmark p2
  188. )
  189. )
  190. define stem as (
  191. unset Y_found
  192. unset stemmed
  193. do ( ['y'] <-'Y' set Y_found )
  194. do repeat(goto (v ['y'])<-'Y' set Y_found )
  195. measure
  196. backwards (
  197. do (Step_1 set stemmed )
  198. do (Step_2 set stemmed )
  199. do (Step_3 set stemmed )
  200. do (Step_4 set stemmed )
  201. )
  202. unset GE_removed
  203. do (Lose_prefix and measure)
  204. backwards (
  205. do (GE_removed Step_1c)
  206. )
  207. unset GE_removed
  208. do (Lose_infix and measure)
  209. backwards (
  210. do (GE_removed Step_1c)
  211. )
  212. backwards (
  213. do (Step_7 set stemmed )
  214. do (stemmed or GE_removed Step_6)
  215. )
  216. do(Y_found repeat(goto (['Y']) <-'y'))
  217. )