123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 |
- routines (
- prelude postlude mark_regions
- RV R1 R2
- standard_suffix
- i_verb_suffix
- verb_suffix
- residual_suffix
- un_double
- un_accent
- )
-
- externals ( stem )
-
- integers ( pV p1 p2 )
-
- groupings ( v keep_with_s )
-
- stringescapes {}
-
- /* special characters (in ISO Latin I) */
-
- stringdef a^ hex 'E2' // a-circumflex
- stringdef a` hex 'E0' // a-grave
- stringdef c, hex 'E7' // c-cedilla
-
- stringdef e" hex 'EB' // e-diaeresis (rare)
- stringdef e' hex 'E9' // e-acute
- stringdef e^ hex 'EA' // e-circumflex
- stringdef e` hex 'E8' // e-grave
- stringdef i" hex 'EF' // i-diaeresis
- stringdef i^ hex 'EE' // i-circumflex
- stringdef o^ hex 'F4' // o-circumflex
- stringdef u^ hex 'FB' // u-circumflex
- stringdef u` hex 'F9' // u-grave
-
- define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
-
- define prelude as repeat goto (
-
- ( v [ ('u' ] v <- 'U') or
- ('i' ] v <- 'I') or
- ('y' ] <- 'Y')
- )
- or
- ( ['y'] v <- 'Y' )
- or
- ( 'q' ['u'] <- 'U' )
- )
-
- define mark_regions as (
-
- $pV = limit
- $p1 = limit
- $p2 = limit // defaults
-
- do (
- ( v v next )
- or
- among ( // this exception list begun Nov 2006
- 'par' // paris, parie, pari
- 'col' // colis
- 'tap' // tapis
- // extensions possible here
- )
- or
- ( next gopast v )
- setmark pV
- )
- do (
- gopast v gopast non-v setmark p1
- gopast v gopast non-v setmark p2
- )
- )
-
- define postlude as repeat (
-
- [substring] among(
- 'I' (<- 'i')
- 'U' (<- 'u')
- 'Y' (<- 'y')
- '' (next)
- )
- )
-
- backwardmode (
-
- define RV as $pV <= cursor
- define R1 as $p1 <= cursor
- define R2 as $p2 <= cursor
-
- define standard_suffix as (
- [substring] among(
-
- 'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
- 'ances' 'iqUes' 'ismes' 'ables' 'istes'
- ( R2 delete )
- 'atrice' 'ateur' 'ation'
- 'atrices' 'ateurs' 'ations'
- ( R2 delete
- try ( ['ic'] (R2 delete) or <-'iqU' )
- )
- 'logie'
- 'logies'
- ( R2 <- 'log' )
- 'usion' 'ution'
- 'usions' 'utions'
- ( R2 <- 'u' )
- 'ence'
- 'ences'
- ( R2 <- 'ent' )
- 'ement'
- 'ements'
- (
- RV delete
- try (
- [substring] among(
- 'iv' (R2 delete ['at'] R2 delete)
- 'eus' ((R2 delete) or (R1<-'eux'))
- 'abl' 'iqU'
- (R2 delete)
- 'i{e`}r' 'I{e`}r' //)
- (RV <-'i') //)--new 2 Sept 02
- )
- )
- )
- 'it{e'}'
- 'it{e'}s'
- (
- R2 delete
- try (
- [substring] among(
- 'abil' ((R2 delete) or <-'abl')
- 'ic' ((R2 delete) or <-'iqU')
- 'iv' (R2 delete)
- )
- )
- )
- 'if' 'ive'
- 'ifs' 'ives'
- (
- R2 delete
- try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
- )
- 'eaux' (<- 'eau')
- 'aux' (R1 <- 'al')
- 'euse'
- 'euses'((R2 delete) or (R1<-'eux'))
-
- 'issement'
- 'issements'(R1 non-v delete) // verbal
-
- // fail(...) below forces entry to verb_suffix. -ment typically
- // follows the p.p., e.g 'confus{e'}ment'.
-
- 'amment' (RV fail(<- 'ant'))
- 'emment' (RV fail(<- 'ent'))
- 'ment'
- 'ments' (test(v RV) fail(delete))
- // v is e,i,u,{e'},I or U
- )
- )
-
- define i_verb_suffix as setlimit tomark pV for (
- [substring] among (
- '{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
- 'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
- 'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
- 'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
- 'issez' 'issiez' 'issions' 'issons' 'it'
- (non-v delete)
- )
- )
-
- define verb_suffix as setlimit tomark pV for (
- [substring] among (
- 'ions'
- (R2 delete)
-
- '{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
- 'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
- 'erons' 'eront' 'ez' 'iez'
-
- // 'ons' //-best omitted
-
- (delete)
-
- '{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
- 'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
- 'assions'
- (delete
- try(['e'] delete)
- )
- )
- )
-
- define keep_with_s 'aiou{e`}s'
-
- define residual_suffix as (
- try(['s'] test non-keep_with_s delete)
- setlimit tomark pV for (
- [substring] among(
- 'ion' (R2 's' or 't' delete)
- 'ier' 'i{e`}re'
- 'Ier' 'I{e`}re' (<-'i')
- 'e' (delete)
- '{e"}' ('gu' delete)
- )
- )
- )
-
- define un_double as (
- test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
- )
-
- define un_accent as (
- atleast 1 non-v
- [ '{e'}' or '{e`}' ] <-'e'
- )
- )
-
- define stem as (
-
- do prelude
- do mark_regions
- backwards (
-
- do (
- (
- ( standard_suffix or
- i_verb_suffix or
- verb_suffix
- )
- and
- try( [ ('Y' ] <- 'i' ) or
- ('{c,}'] <- 'c' )
- )
- ) or
- residual_suffix
- )
-
- // try(['ent'] RV delete) // is best omitted
-
- do un_double
- do un_accent
- )
- do postlude
- )
|