Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

french.sbl 6.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. routines (
  2. prelude postlude mark_regions
  3. RV R1 R2
  4. standard_suffix
  5. i_verb_suffix
  6. verb_suffix
  7. residual_suffix
  8. un_double
  9. un_accent
  10. )
  11. externals ( stem )
  12. integers ( pV p1 p2 )
  13. groupings ( v keep_with_s )
  14. stringescapes {}
  15. /* special characters */
  16. stringdef a^ '{U+00E2}' // a-circumflex
  17. stringdef a` '{U+00E0}' // a-grave
  18. stringdef c, '{U+00E7}' // c-cedilla
  19. stringdef e" '{U+00EB}' // e-diaeresis (rare)
  20. stringdef e' '{U+00E9}' // e-acute
  21. stringdef e^ '{U+00EA}' // e-circumflex
  22. stringdef e` '{U+00E8}' // e-grave
  23. stringdef i" '{U+00EF}' // i-diaeresis
  24. stringdef i^ '{U+00EE}' // i-circumflex
  25. stringdef o^ '{U+00F4}' // o-circumflex
  26. stringdef u^ '{U+00FB}' // u-circumflex
  27. stringdef u` '{U+00F9}' // u-grave
  28. define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
  29. define prelude as repeat goto (
  30. ( v [ ('u' ] v <- 'U') or
  31. ('i' ] v <- 'I') or
  32. ('y' ] <- 'Y')
  33. )
  34. or
  35. ( [ '{e"}' ] <- 'He' )
  36. or
  37. ( [ '{i"}' ] <- 'Hi' )
  38. or
  39. ( ['y'] v <- 'Y' )
  40. or
  41. ( 'q' ['u'] <- 'U' )
  42. )
  43. define mark_regions as (
  44. $pV = limit
  45. $p1 = limit
  46. $p2 = limit // defaults
  47. do (
  48. ( v v next )
  49. or
  50. among ( // this exception list begun Nov 2006
  51. 'par' // paris, parie, pari
  52. 'col' // colis
  53. 'tap' // tapis
  54. // extensions possible here
  55. )
  56. or
  57. ( next gopast v )
  58. setmark pV
  59. )
  60. do (
  61. gopast v gopast non-v setmark p1
  62. gopast v gopast non-v setmark p2
  63. )
  64. )
  65. define postlude as repeat (
  66. [substring] among(
  67. 'I' (<- 'i')
  68. 'U' (<- 'u')
  69. 'Y' (<- 'y')
  70. 'He' (<- '{e"}')
  71. 'Hi' (<- '{i"}')
  72. 'H' (delete)
  73. '' (next)
  74. )
  75. )
  76. backwardmode (
  77. define RV as $pV <= cursor
  78. define R1 as $p1 <= cursor
  79. define R2 as $p2 <= cursor
  80. define standard_suffix as (
  81. [substring] among(
  82. 'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
  83. 'ances' 'iqUes' 'ismes' 'ables' 'istes'
  84. ( R2 delete )
  85. 'atrice' 'ateur' 'ation'
  86. 'atrices' 'ateurs' 'ations'
  87. ( R2 delete
  88. try ( ['ic'] (R2 delete) or <-'iqU' )
  89. )
  90. 'logie'
  91. 'logies'
  92. ( R2 <- 'log' )
  93. 'usion' 'ution'
  94. 'usions' 'utions'
  95. ( R2 <- 'u' )
  96. 'ence'
  97. 'ences'
  98. ( R2 <- 'ent' )
  99. 'ement'
  100. 'ements'
  101. (
  102. RV delete
  103. try (
  104. [substring] among(
  105. 'iv' (R2 delete ['at'] R2 delete)
  106. 'eus' ((R2 delete) or (R1<-'eux'))
  107. 'abl' 'iqU'
  108. (R2 delete)
  109. 'i{e`}r' 'I{e`}r' //)
  110. (RV <-'i') //)--new 2 Sept 02
  111. )
  112. )
  113. )
  114. 'it{e'}'
  115. 'it{e'}s'
  116. (
  117. R2 delete
  118. try (
  119. [substring] among(
  120. 'abil' ((R2 delete) or <-'abl')
  121. 'ic' ((R2 delete) or <-'iqU')
  122. 'iv' (R2 delete)
  123. )
  124. )
  125. )
  126. 'if' 'ive'
  127. 'ifs' 'ives'
  128. (
  129. R2 delete
  130. try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
  131. )
  132. 'eaux' (<- 'eau')
  133. 'aux' (R1 <- 'al')
  134. 'euse'
  135. 'euses'((R2 delete) or (R1<-'eux'))
  136. 'issement'
  137. 'issements'(R1 non-v delete) // verbal
  138. // fail(...) below forces entry to verb_suffix. -ment typically
  139. // follows the p.p., e.g 'confus{e'}ment'.
  140. 'amment' (RV fail(<- 'ant'))
  141. 'emment' (RV fail(<- 'ent'))
  142. 'ment'
  143. 'ments' (test(v RV) fail(delete))
  144. // v is e,i,u,{e'},I or U
  145. )
  146. )
  147. define i_verb_suffix as setlimit tomark pV for (
  148. [substring] among (
  149. '{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
  150. 'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
  151. 'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
  152. 'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
  153. 'issez' 'issiez' 'issions' 'issons' 'it'
  154. (not 'H' non-v delete)
  155. )
  156. )
  157. define verb_suffix as setlimit tomark pV for (
  158. [substring] among (
  159. 'ions'
  160. (R2 delete)
  161. '{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
  162. 'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
  163. 'erons' 'eront' 'ez' 'iez'
  164. // 'ons' //-best omitted
  165. (delete)
  166. '{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
  167. 'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
  168. 'assions'
  169. (delete
  170. try(['e'] delete)
  171. )
  172. )
  173. )
  174. define keep_with_s 'aiou{e`}s'
  175. define residual_suffix as (
  176. try(['s'] test ('Hi' or non-keep_with_s) delete)
  177. setlimit tomark pV for (
  178. [substring] among(
  179. 'ion' (R2 's' or 't' delete)
  180. 'ier' 'i{e`}re'
  181. 'Ier' 'I{e`}re' (<-'i')
  182. 'e' (delete)
  183. )
  184. )
  185. )
  186. define un_double as (
  187. test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
  188. )
  189. define un_accent as (
  190. atleast 1 non-v
  191. [ '{e'}' or '{e`}' ] <-'e'
  192. )
  193. )
  194. define stem as (
  195. do prelude
  196. do mark_regions
  197. backwards (
  198. do (
  199. (
  200. ( standard_suffix or
  201. i_verb_suffix or
  202. verb_suffix
  203. )
  204. and
  205. try( [ ('Y' ] <- 'i' ) or
  206. ('{c,}'] <- 'c' )
  207. )
  208. ) or
  209. residual_suffix
  210. )
  211. // try(['ent'] RV delete) // is best omitted
  212. do un_double
  213. do un_accent
  214. )
  215. do postlude
  216. )