You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

russian.sbl 6.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. stringescapes {}
  2. /* the 33 Cyrillic letters represented in ASCII characters following the
  3. * conventions of the standard Library of Congress transliteration: */
  4. stringdef a '{U+0430}'
  5. stringdef b '{U+0431}'
  6. stringdef v '{U+0432}'
  7. stringdef g '{U+0433}'
  8. stringdef d '{U+0434}'
  9. stringdef e '{U+0435}'
  10. stringdef e" '{U+0451}'
  11. stringdef zh '{U+0436}'
  12. stringdef z '{U+0437}'
  13. stringdef i '{U+0438}'
  14. stringdef i` '{U+0439}'
  15. stringdef k '{U+043A}'
  16. stringdef l '{U+043B}'
  17. stringdef m '{U+043C}'
  18. stringdef n '{U+043D}'
  19. stringdef o '{U+043E}'
  20. stringdef p '{U+043F}'
  21. stringdef r '{U+0440}'
  22. stringdef s '{U+0441}'
  23. stringdef t '{U+0442}'
  24. stringdef u '{U+0443}'
  25. stringdef f '{U+0444}'
  26. stringdef kh '{U+0445}'
  27. stringdef ts '{U+0446}'
  28. stringdef ch '{U+0447}'
  29. stringdef sh '{U+0448}'
  30. stringdef shch '{U+0449}'
  31. stringdef " '{U+044A}'
  32. stringdef y '{U+044B}'
  33. stringdef ' '{U+044C}'
  34. stringdef e` '{U+044D}'
  35. stringdef iu '{U+044E}'
  36. stringdef ia '{U+044F}'
  37. routines ( mark_regions R2
  38. perfective_gerund
  39. adjective
  40. adjectival
  41. reflexive
  42. verb
  43. noun
  44. derivational
  45. tidy_up
  46. )
  47. externals ( stem )
  48. integers ( pV p2 )
  49. groupings ( v )
  50. define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
  51. define mark_regions as (
  52. $pV = limit
  53. $p2 = limit
  54. do (
  55. gopast v setmark pV gopast non-v
  56. gopast v gopast non-v setmark p2
  57. )
  58. )
  59. backwardmode (
  60. define R2 as $p2 <= cursor
  61. define perfective_gerund as (
  62. [substring] among (
  63. '{v}'
  64. '{v}{sh}{i}'
  65. '{v}{sh}{i}{s}{'}'
  66. ('{a}' or '{ia}' delete)
  67. '{i}{v}'
  68. '{i}{v}{sh}{i}'
  69. '{i}{v}{sh}{i}{s}{'}'
  70. '{y}{v}'
  71. '{y}{v}{sh}{i}'
  72. '{y}{v}{sh}{i}{s}{'}'
  73. (delete)
  74. )
  75. )
  76. define adjective as (
  77. [substring] among (
  78. '{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
  79. '{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
  80. '{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
  81. '{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
  82. '{ia}{ia}'
  83. // and -
  84. '{o}{iu}' // - which is somewhat archaic
  85. '{e}{iu}' // - soft form of {o}{iu}
  86. (delete)
  87. )
  88. )
  89. define adjectival as (
  90. adjective
  91. /* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
  92. nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
  93. errors. Removing im, uem, enn creates too many errors.
  94. */
  95. try (
  96. [substring] among (
  97. '{e}{m}' // present passive participle
  98. '{n}{n}' // adjective from past passive participle
  99. '{v}{sh}' // past active participle
  100. '{iu}{shch}' '{shch}' // present active participle
  101. ('{a}' or '{ia}' delete)
  102. //but not '{i}{m}' '{u}{e}{m}' // present passive participle
  103. //or '{e}{n}{n}' // adjective from past passive participle
  104. '{i}{v}{sh}' '{y}{v}{sh}'// past active participle
  105. '{u}{iu}{shch}' // present active participle
  106. (delete)
  107. )
  108. )
  109. )
  110. define reflexive as (
  111. [substring] among (
  112. '{s}{ia}'
  113. '{s}{'}'
  114. (delete)
  115. )
  116. )
  117. define verb as (
  118. [substring] among (
  119. '{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
  120. '{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
  121. '{n}{y}' '{t}{'}' '{e}{sh}{'}'
  122. '{n}{n}{o}'
  123. ('{a}' or '{ia}' delete)
  124. '{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
  125. '{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
  126. '{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
  127. '{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
  128. '{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
  129. '{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
  130. (delete)
  131. /* note the short passive participle tests:
  132. '{n}{a}' '{n}' '{n}{o}' '{n}{y}'
  133. '{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
  134. */
  135. )
  136. )
  137. define noun as (
  138. [substring] among (
  139. '{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
  140. '{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
  141. '{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
  142. '{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
  143. '{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
  144. '{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
  145. (delete)
  146. /* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
  147. '{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
  148. omitted - they only occur on 12 words.
  149. */
  150. )
  151. )
  152. define derivational as (
  153. [substring] R2 among (
  154. '{o}{s}{t}'
  155. '{o}{s}{t}{'}'
  156. (delete)
  157. )
  158. )
  159. define tidy_up as (
  160. [substring] among (
  161. '{e}{i`}{sh}'
  162. '{e}{i`}{sh}{e}' // superlative forms
  163. (delete
  164. ['{n}'] '{n}' delete
  165. )
  166. '{n}'
  167. ('{n}' delete) // e.g. -nno endings
  168. '{'}'
  169. (delete) // with some slight false conflations
  170. )
  171. )
  172. )
  173. define stem as (
  174. // Normalise {e"} to {e}. The documentation has long suggested the user
  175. // should do this before calling the stemmer - we now do it for them.
  176. do repeat ( goto (['{e"}']) <- '{e}' )
  177. do mark_regions
  178. backwards setlimit tomark pV for (
  179. do (
  180. perfective_gerund or
  181. ( try reflexive
  182. adjectival or verb or noun
  183. )
  184. )
  185. try([ '{i}' ] delete)
  186. // because noun ending -i{iu} is being treated as verb ending -{iu}
  187. do derivational
  188. do tidy_up
  189. )
  190. )