You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

portuguese.sbl 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. routines (
  2. prelude postlude mark_regions
  3. RV R1 R2
  4. standard_suffix
  5. verb_suffix
  6. residual_suffix
  7. residual_form
  8. )
  9. externals ( stem )
  10. integers ( pV p1 p2 )
  11. groupings ( v )
  12. stringescapes {}
  13. /* special characters */
  14. stringdef a' '{U+00E1}' // a-acute
  15. stringdef a^ '{U+00E2}' // a-circumflex e.g. 'bota^nico
  16. stringdef e' '{U+00E9}' // e-acute
  17. stringdef e^ '{U+00EA}' // e-circumflex
  18. stringdef i' '{U+00ED}' // i-acute
  19. stringdef o^ '{U+00F4}' // o-circumflex
  20. stringdef o' '{U+00F3}' // o-acute
  21. stringdef u' '{U+00FA}' // u-acute
  22. stringdef c, '{U+00E7}' // c-cedilla
  23. stringdef a~ '{U+00E3}' // a-tilde
  24. stringdef o~ '{U+00F5}' // o-tilde
  25. define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
  26. define prelude as repeat (
  27. [substring] among(
  28. '{a~}' (<- 'a~')
  29. '{o~}' (<- 'o~')
  30. '' (next)
  31. ) //or next
  32. )
  33. define mark_regions as (
  34. $pV = limit
  35. $p1 = limit
  36. $p2 = limit // defaults
  37. do (
  38. ( v (non-v gopast v) or (v gopast non-v) )
  39. or
  40. ( non-v (non-v gopast v) or (v next) )
  41. setmark pV
  42. )
  43. do (
  44. gopast v gopast non-v setmark p1
  45. gopast v gopast non-v setmark p2
  46. )
  47. )
  48. define postlude as repeat (
  49. [substring] among(
  50. 'a~' (<- '{a~}')
  51. 'o~' (<- '{o~}')
  52. '' (next)
  53. ) //or next
  54. )
  55. backwardmode (
  56. define RV as $pV <= cursor
  57. define R1 as $p1 <= cursor
  58. define R2 as $p2 <= cursor
  59. define standard_suffix as (
  60. [substring] among(
  61. 'eza' 'ezas'
  62. 'ico' 'ica' 'icos' 'icas'
  63. 'ismo' 'ismos'
  64. '{a'}vel'
  65. '{i'}vel'
  66. 'ista' 'istas'
  67. 'oso' 'osa' 'osos' 'osas'
  68. 'amento' 'amentos'
  69. 'imento' 'imentos'
  70. 'adora' 'ador' 'a{c,}a~o'
  71. 'adoras' 'adores' 'a{c,}o~es' // no -ic test
  72. 'ante' 'antes' '{a^}ncia' // Note 1
  73. (
  74. R2 delete
  75. )
  76. 'logia'
  77. 'logias'
  78. (
  79. R2 <- 'log'
  80. )
  81. 'u{c,}a~o' 'u{c,}o~es'
  82. (
  83. R2 <- 'u'
  84. )
  85. '{e^}ncia' '{e^}ncias'
  86. (
  87. R2 <- 'ente'
  88. )
  89. 'amente'
  90. (
  91. R1 delete
  92. try (
  93. [substring] R2 delete among(
  94. 'iv' (['at'] R2 delete)
  95. 'os'
  96. 'ic'
  97. 'ad'
  98. )
  99. )
  100. )
  101. 'mente'
  102. (
  103. R2 delete
  104. try (
  105. [substring] among(
  106. 'ante' // Note 1
  107. 'avel'
  108. '{i'}vel' (R2 delete)
  109. )
  110. )
  111. )
  112. 'idade'
  113. 'idades'
  114. (
  115. R2 delete
  116. try (
  117. [substring] among(
  118. 'abil'
  119. 'ic'
  120. 'iv' (R2 delete)
  121. )
  122. )
  123. )
  124. 'iva' 'ivo'
  125. 'ivas' 'ivos'
  126. (
  127. R2 delete
  128. try (
  129. ['at'] R2 delete // but not a further ['ic'] R2 delete
  130. )
  131. )
  132. 'ira' 'iras'
  133. (
  134. RV 'e' // -eira -eiras usually non-verbal
  135. <- 'ir'
  136. )
  137. )
  138. )
  139. define verb_suffix as setlimit tomark pV for (
  140. [substring] among(
  141. 'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
  142. 'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
  143. 'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
  144. 'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
  145. 'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
  146. 'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
  147. 'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
  148. 'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
  149. 'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
  150. 'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
  151. '{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
  152. '{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
  153. '{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
  154. 'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
  155. 'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
  156. '{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'
  157. 'ira' 'iras'
  158. (delete)
  159. )
  160. )
  161. define residual_suffix as (
  162. [substring] among(
  163. 'os'
  164. 'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
  165. ( RV delete )
  166. )
  167. )
  168. define residual_form as (
  169. [substring] among(
  170. 'e' '{e'}' '{e^}'
  171. ( RV delete [('u'] test 'g') or
  172. ('i'] test 'c') RV delete )
  173. '{c,}' (<-'c')
  174. )
  175. )
  176. )
  177. define stem as (
  178. do prelude
  179. do mark_regions
  180. backwards (
  181. do (
  182. ( ( standard_suffix or verb_suffix )
  183. and do ( ['i'] test 'c' RV delete )
  184. )
  185. or residual_suffix
  186. )
  187. do residual_form
  188. )
  189. do postlude
  190. )
  191. /*
  192. Note 1: additions of 15 Jun 2005
  193. */