You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

stem_Unicode.sbl 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. stringescapes {}
  2. /* the 32 Cyrillic letters in Unicode */
  3. stringdef a hex '430'
  4. stringdef b hex '431'
  5. stringdef v hex '432'
  6. stringdef g hex '433'
  7. stringdef d hex '434'
  8. stringdef e hex '435'
  9. stringdef zh hex '436'
  10. stringdef z hex '437'
  11. stringdef i hex '438'
  12. stringdef i` hex '439'
  13. stringdef k hex '43A'
  14. stringdef l hex '43B'
  15. stringdef m hex '43C'
  16. stringdef n hex '43D'
  17. stringdef o hex '43E'
  18. stringdef p hex '43F'
  19. stringdef r hex '440'
  20. stringdef s hex '441'
  21. stringdef t hex '442'
  22. stringdef u hex '443'
  23. stringdef f hex '444'
  24. stringdef kh hex '445'
  25. stringdef ts hex '446'
  26. stringdef ch hex '447'
  27. stringdef sh hex '448'
  28. stringdef shch hex '449'
  29. stringdef " hex '44A'
  30. stringdef y hex '44B'
  31. stringdef ' hex '44C'
  32. stringdef e` hex '44D'
  33. stringdef iu hex '44E'
  34. stringdef ia hex '44F'
  35. routines ( mark_regions R2
  36. perfective_gerund
  37. adjective
  38. adjectival
  39. reflexive
  40. verb
  41. noun
  42. derivational
  43. tidy_up
  44. )
  45. externals ( stem )
  46. integers ( pV p2 )
  47. groupings ( v )
  48. define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
  49. define mark_regions as (
  50. $pV = limit
  51. $p2 = limit
  52. do (
  53. gopast v setmark pV gopast non-v
  54. gopast v gopast non-v setmark p2
  55. )
  56. )
  57. backwardmode (
  58. define R2 as $p2 <= cursor
  59. define perfective_gerund as (
  60. [substring] among (
  61. '{v}'
  62. '{v}{sh}{i}'
  63. '{v}{sh}{i}{s}{'}'
  64. ('{a}' or '{ia}' delete)
  65. '{i}{v}'
  66. '{i}{v}{sh}{i}'
  67. '{i}{v}{sh}{i}{s}{'}'
  68. '{y}{v}'
  69. '{y}{v}{sh}{i}'
  70. '{y}{v}{sh}{i}{s}{'}'
  71. (delete)
  72. )
  73. )
  74. define adjective as (
  75. [substring] among (
  76. '{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
  77. '{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
  78. '{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
  79. '{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
  80. '{ia}{ia}'
  81. // and -
  82. '{o}{iu}' // - which is somewhat archaic
  83. '{e}{iu}' // - soft form of {o}{iu}
  84. (delete)
  85. )
  86. )
  87. define adjectival as (
  88. adjective
  89. /* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
  90. nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
  91. errors. Removing im, uem, enn creates too many errors.
  92. */
  93. try (
  94. [substring] among (
  95. '{e}{m}' // present passive participle
  96. '{n}{n}' // adjective from past passive participle
  97. '{v}{sh}' // past active participle
  98. '{iu}{shch}' '{shch}' // present active participle
  99. ('{a}' or '{ia}' delete)
  100. //but not '{i}{m}' '{u}{e}{m}' // present passive participle
  101. //or '{e}{n}{n}' // adjective from past passive participle
  102. '{i}{v}{sh}' '{y}{v}{sh}'// past active participle
  103. '{u}{iu}{shch}' // present active participle
  104. (delete)
  105. )
  106. )
  107. )
  108. define reflexive as (
  109. [substring] among (
  110. '{s}{ia}'
  111. '{s}{'}'
  112. (delete)
  113. )
  114. )
  115. define verb as (
  116. [substring] among (
  117. '{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
  118. '{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
  119. '{n}{y}' '{t}{'}' '{e}{sh}{'}'
  120. '{n}{n}{o}'
  121. ('{a}' or '{ia}' delete)
  122. '{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
  123. '{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
  124. '{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
  125. '{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
  126. '{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
  127. '{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
  128. (delete)
  129. /* note the short passive participle tests:
  130. '{n}{a}' '{n}' '{n}{o}' '{n}{y}'
  131. '{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
  132. */
  133. )
  134. )
  135. define noun as (
  136. [substring] among (
  137. '{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
  138. '{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
  139. '{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
  140. '{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
  141. '{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
  142. '{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
  143. (delete)
  144. /* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
  145. '{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
  146. omitted - they only occur on 12 words.
  147. */
  148. )
  149. )
  150. define derivational as (
  151. [substring] R2 among (
  152. '{o}{s}{t}'
  153. '{o}{s}{t}{'}'
  154. (delete)
  155. )
  156. )
  157. define tidy_up as (
  158. [substring] among (
  159. '{e}{i`}{sh}'
  160. '{e}{i`}{sh}{e}' // superlative forms
  161. (delete
  162. ['{n}'] '{n}' delete
  163. )
  164. '{n}'
  165. ('{n}' delete) // e.g. -nno endings
  166. '{'}'
  167. (delete) // with some slight false conflations
  168. )
  169. )
  170. )
  171. define stem as (
  172. do mark_regions
  173. backwards setlimit tomark pV for (
  174. do (
  175. perfective_gerund or
  176. ( try reflexive
  177. adjectival or verb or noun
  178. )
  179. )
  180. try([ '{i}' ] delete)
  181. // because noun ending -i{iu} is being treated as verb ending -{iu}
  182. do derivational
  183. do tidy_up
  184. )
  185. )