Remove bloody submodules.

author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-12-31 17:38:02 +0000
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-12-31 17:38:02 +0000
commit: 2375dba898b481837879940dfdcf3ea85248fe01 (patch)
tree: cced5fb680e9a362d1de25630bff537865d38365 /contrib/snowball/algorithms
parent: 1543c98d38ffb84a1e405081436d0a25bee713a6 (diff)
download: rspamd-2375dba898b481837879940dfdcf3ea85248fe01.tar.gz
rspamd-2375dba898b481837879940dfdcf3ea85248fe01.zip
31 files changed, 5890 insertions, 0 deletions
diff --git a/contrib/snowball/algorithms/danish/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/danish/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..0a8190a08
--- /dev/null
+++ b/contrib/snowball/algorithms/danish/stem_ISO_8859_1.sbl
@@ -0,0 +1,91 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+           undouble
+)
+
+externals ( stem )
+
+strings ( ch )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef ae   hex 'E6'
+stringdef ao   hex 'E5'
+stringdef o/   hex 'F8'
+
+define v 'aeiouy{ae}{ao}{o/}'
+
+define s_ending  'abcdfghjklmnoprtvyz{ao}'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    test ( hop 3 setmark x )
+    goto v gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
+            'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
+            'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
+            'erets' 'et' 'eret'
+                (delete)
+            's'
+                (s_ending delete)
+        )
+    )
+
+    define consonant_pair as (
+        test (
+            setlimit tomark p1 for ([substring])
+            among(
+                'gd' // significant in the call from other_suffix
+                'dt' 'gt' 'kt'
+            )
+        )
+        next] delete
+    )
+
+    define other_suffix as (
+        do ( ['st'] 'ig' delete )
+        setlimit tomark p1 for ([substring])
+        among(
+            'ig' 'lig' 'elig' 'els'
+                (delete do consonant_pair)
+            'l{o/}st'
+                (<-'l{o/}s')
+        )
+    )
+    define undouble as (
+        setlimit tomark p1 for ([non-v] ->ch)
+        ch
+        delete
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+        do undouble
+    )
+)
diff --git a/contrib/snowball/algorithms/danish/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/danish/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 000000000..1131a1cb7
--- /dev/null
+++ b/contrib/snowball/algorithms/danish/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,91 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+           undouble
+)
+
+externals ( stem )
+
+strings ( ch )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef ae   hex '91'
+stringdef ao   hex '86'
+stringdef o/   hex '9B'
+
+define v 'aeiouy{ae}{ao}{o/}'
+
+define s_ending  'abcdfghjklmnoprtvyz{ao}'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    test ( hop 3 setmark x )
+    goto v gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
+            'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
+            'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
+            'erets' 'et' 'eret'
+                (delete)
+            's'
+                (s_ending delete)
+        )
+    )
+
+    define consonant_pair as (
+        test (
+            setlimit tomark p1 for ([substring])
+            among(
+                'gd' // significant in the call from other_suffix
+                'dt' 'gt' 'kt'
+            )
+        )
+        next] delete
+    )
+
+    define other_suffix as (
+        do ( ['st'] 'ig' delete )
+        setlimit tomark p1 for ([substring])
+        among(
+            'ig' 'lig' 'elig' 'els'
+                (delete do consonant_pair)
+            'l{o/}st'
+                (<-'l{o/}s')
+        )
+    )
+    define undouble as (
+        setlimit tomark p1 for ([non-v] ->ch)
+        ch
+        delete
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+        do undouble
+    )
+)
diff --git a/contrib/snowball/algorithms/dutch/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/dutch/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..f7609f766
--- /dev/null
+++ b/contrib/snowball/algorithms/dutch/stem_ISO_8859_1.sbl
@@ -0,0 +1,164 @@
+routines (
+           prelude postlude
+           e_ending
+           en_ending
+           mark_regions
+           R1 R2
+           undouble
+           standard_suffix
+)
+
+externals ( stem )
+
+booleans ( e_found )
+
+integers ( p1 p2 )
+
+groupings ( v v_I v_j )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef e"   hex 'EB'
+stringdef i"   hex 'EF'
+stringdef o"   hex 'F6'
+stringdef u"   hex 'FC'
+
+stringdef a'   hex 'E1'
+stringdef e'   hex 'E9'
+stringdef i'   hex 'ED'
+stringdef o'   hex 'F3'
+stringdef u'   hex 'FA'
+
+stringdef e`   hex 'E8'
+
+define v       'aeiouy{e`}'
+define v_I     v + 'I'
+define v_j     v + 'j'
+
+define prelude as (
+    test repeat (
+        [substring] among(
+            '{a"}' '{a'}'
+                (<- 'a')
+            '{e"}' '{e'}'
+                (<- 'e')
+            '{i"}' '{i'}'
+                (<- 'i')
+            '{o"}' '{o'}'
+                (<- 'o')
+            '{u"}' '{u'}'
+                (<- 'u')
+            ''  (next)
+        ) //or next
+    )
+    try(['y'] <- 'Y')
+    repeat goto (
+        v [('i'] v <- 'I') or
+           ('y']   <- 'Y')
+    )
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < 3  $p1 = 3)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'  (<- 'y')
+        'I'  (<- 'i')
+        ''   (next)
+    ) //or next
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define undouble as (
+        test among('kk' 'dd' 'tt') [next] delete
+    )
+
+    define e_ending as (
+        unset e_found
+        ['e'] R1 test non-v delete
+        set e_found
+        undouble
+    )
+
+    define en_ending as (
+        R1 non-v and not 'gem' delete
+        undouble
+    )
+
+    define standard_suffix as (
+        do (
+            [substring] among(
+                'heden'
+                (   R1 <- 'heid'
+                )
+                'en' 'ene'
+                (   en_ending
+                )
+                's' 'se'
+                (   R1 non-v_j delete
+                )
+            )
+        )
+        do e_ending
+
+        do ( ['heid'] R2 not 'c' delete
+             ['en'] en_ending
+           )
+
+        do (
+            [substring] among(
+                'end' 'ing'
+                (   R2 delete
+                    (['ig'] R2 not 'e' delete) or undouble
+                )
+                'ig'
+                (   R2 not 'e' delete
+                )
+                'lijk'
+                (   R2 delete e_ending
+                )
+                'baar'
+                (   R2 delete
+                )
+                'bar'
+                (   R2 e_found delete
+                )
+            )
+        )
+        do (
+            non-v_I
+            test (
+                among ('aa' 'ee' 'oo' 'uu')
+                non-v
+            )
+            [next] delete
+        )
+    )
+)
+
+define stem as (
+
+        do prelude
+        do mark_regions
+        backwards
+            do standard_suffix
+        do postlude
+)
diff --git a/contrib/snowball/algorithms/dutch/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 000000000..15b8718d1
--- /dev/null
+++ b/contrib/snowball/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,164 @@
+routines (
+           prelude postlude
+           e_ending
+           en_ending
+           mark_regions
+           R1 R2
+           undouble
+           standard_suffix
+)
+
+externals ( stem )
+
+booleans ( e_found )
+
+integers ( p1 p2 )
+
+groupings ( v v_I v_j )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a"   hex '84'
+stringdef e"   hex '89'
+stringdef i"   hex '8B'
+stringdef o"   hex '94'
+stringdef u"   hex '81'
+
+stringdef a'   hex 'A0'
+stringdef e'   hex '82'
+stringdef i'   hex 'A1'
+stringdef o'   hex 'A2'
+stringdef u'   hex 'A3'
+
+stringdef e`   hex '8A'
+
+define v       'aeiouy{e`}'
+define v_I     v + 'I'
+define v_j     v + 'j'
+
+define prelude as (
+    test repeat (
+        [substring] among(
+            '{a"}' '{a'}'
+                (<- 'a')
+            '{e"}' '{e'}'
+                (<- 'e')
+            '{i"}' '{i'}'
+                (<- 'i')
+            '{o"}' '{o'}'
+                (<- 'o')
+            '{u"}' '{u'}'
+                (<- 'u')
+            ''  (next)
+        ) //or next
+    )
+    try(['y'] <- 'Y')
+    repeat goto (
+        v [('i'] v <- 'I') or
+           ('y']   <- 'Y')
+    )
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < 3  $p1 = 3)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'  (<- 'y')
+        'I'  (<- 'i')
+        ''   (next)
+    ) //or next
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define undouble as (
+        test among('kk' 'dd' 'tt') [next] delete
+    )
+
+    define e_ending as (
+        unset e_found
+        ['e'] R1 test non-v delete
+        set e_found
+        undouble
+    )
+
+    define en_ending as (
+        R1 non-v and not 'gem' delete
+        undouble
+    )
+
+    define standard_suffix as (
+        do (
+            [substring] among(
+                'heden'
+                (   R1 <- 'heid'
+                )
+                'en' 'ene'
+                (   en_ending
+                )
+                's' 'se'
+                (   R1 non-v_j delete
+                )
+            )
+        )
+        do e_ending
+
+        do ( ['heid'] R2 not 'c' delete
+             ['en'] en_ending
+           )
+
+        do (
+            [substring] among(
+                'end' 'ing'
+                (   R2 delete
+                    (['ig'] R2 not 'e' delete) or undouble
+                )
+                'ig'
+                (   R2 not 'e' delete
+                )
+                'lijk'
+                (   R2 delete e_ending
+                )
+                'baar'
+                (   R2 delete
+                )
+                'bar'
+                (   R2 e_found delete
+                )
+            )
+        )
+        do (
+            non-v_I
+            test (
+                among ('aa' 'ee' 'oo' 'uu')
+                non-v
+            )
+            [next] delete
+        )
+    )
+)
+
+define stem as (
+
+        do prelude
+        do mark_regions
+        backwards
+            do standard_suffix
+        do postlude
+)
diff --git a/contrib/snowball/algorithms/english/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/english/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..fe18d7a91
--- /dev/null
+++ b/contrib/snowball/algorithms/english/stem_ISO_8859_1.sbl
@@ -0,0 +1,229 @@
+integers ( p1 p2 )
+booleans ( Y_found )
+
+routines (
+    prelude postlude
+    mark_regions
+    shortv
+    R1 R2
+    Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5
+    exception1
+    exception2
+)
+
+externals ( stem )
+
+groupings ( v v_WXY valid_LI )
+
+stringescapes {}
+
+define v        'aeiouy'
+define v_WXY    v + 'wxY'
+
+define valid_LI 'cdeghkmnrt'
+
+define prelude as (
+    unset Y_found
+    do ( ['{'}'] delete)
+    do ( ['y'] <-'Y' set Y_found)
+    do repeat(goto (v ['y']) <-'Y' set Y_found)
+)
+
+define mark_regions as (
+    $p1 = limit
+    $p2 = limit
+    do(
+        among (
+            'gener'
+            'commun'  //  added May 2005
+            'arsen'   //  added Nov 2006 (arsenic/arsenal)
+            // ... extensions possible here ...
+        ) or (gopast v  gopast non-v)
+        setmark p1
+        gopast v  gopast non-v  setmark p2
+    )
+)
+
+backwardmode (
+
+    define shortv as (
+        ( non-v_WXY v non-v )
+        or
+        ( non-v v atlimit )
+    )
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define Step_1a as (
+        try (
+            [substring] among (
+                '{'}' '{'}s' '{'}s{'}'
+                       (delete)
+            )
+        )
+        [substring] among (
+            'sses' (<-'ss')
+            'ied' 'ies'
+                   ((hop 2 <-'i') or <-'ie')
+            's'    (next gopast v delete)
+            'us' 'ss'
+        )
+    )
+
+    define Step_1b as (
+        [substring] among (
+            'eed' 'eedly'
+                (R1 <-'ee')
+            'ed' 'edly' 'ing' 'ingly'
+                (
+                test gopast v  delete
+                test substring among(
+                    'at' 'bl' 'iz'
+                         (<+ 'e')
+                    'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
+                    // ignoring double c, h, j, k, q, v, w, and x
+                         ([next]  delete)
+                    ''   (atmark p1  test shortv  <+ 'e')
+                )
+            )
+        )
+    )
+
+    define Step_1c as (
+        ['y' or 'Y']
+        non-v not atlimit
+        <-'i'
+    )
+
+    define Step_2 as (
+        [substring] R1 among (
+            'tional'  (<-'tion')
+            'enci'    (<-'ence')
+            'anci'    (<-'ance')
+            'abli'    (<-'able')
+            'entli'   (<-'ent')
+            'izer' 'ization'
+                      (<-'ize')
+            'ational' 'ation' 'ator'
+                      (<-'ate')
+            'alism' 'aliti' 'alli'
+                      (<-'al')
+            'fulness' (<-'ful')
+            'ousli' 'ousness'
+                      (<-'ous')
+            'iveness' 'iviti'
+                      (<-'ive')
+            'biliti' 'bli'
+                      (<-'ble')
+            'ogi'     ('l' <-'og')
+            'fulli'   (<-'ful')
+            'lessli'  (<-'less')
+            'li'      (valid_LI delete)
+        )
+    )
+
+    define Step_3 as (
+        [substring] R1 among (
+            'tional'  (<- 'tion')
+            'ational' (<- 'ate')
+            'alize'   (<-'al')
+            'icate' 'iciti' 'ical'
+                      (<-'ic')
+            'ful' 'ness'
+                      (delete)
+            'ative'
+                      (R2 delete)  // 'R2' added Dec 2001
+        )
+    )
+
+    define Step_4 as (
+        [substring] R2 among (
+            'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
+            'ment' 'ent' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
+                      (delete)
+            'ion'     ('s' or 't' delete)
+        )
+    )
+
+    define Step_5 as (
+        [substring] among (
+            'e' (R2 or (R1 not shortv) delete)
+            'l' (R2 'l' delete)
+        )
+    )
+
+    define exception2 as (
+
+        [substring] atlimit among(
+            'inning' 'outing' 'canning' 'herring' 'earring'
+            'proceed' 'exceed' 'succeed'
+
+            // ... extensions possible here ...
+
+        )
+    )
+)
+
+define exception1 as (
+
+    [substring] atlimit among(
+
+        /* special changes: */
+
+        'skis'      (<-'ski')
+        'skies'     (<-'sky')
+        'dying'     (<-'die')
+        'lying'     (<-'lie')
+        'tying'     (<-'tie')
+
+        /* special -LY cases */
+
+        'idly'      (<-'idl')
+        'gently'    (<-'gentl')
+        'ugly'      (<-'ugli')
+        'early'     (<-'earli')
+        'only'      (<-'onli')
+        'singly'    (<-'singl')
+
+        // ... extensions possible here ...
+
+        /* invariant forms: */
+
+        'sky'
+        'news'
+        'howe'
+
+        'atlas' 'cosmos' 'bias' 'andes' // not plural forms
+
+        // ... extensions possible here ...
+    )
+)
+
+define postlude as (Y_found  repeat(goto (['Y']) <-'y'))
+
+define stem as (
+
+    exception1 or
+    not hop 3 or (
+        do prelude
+        do mark_regions
+        backwards (
+
+            do Step_1a
+
+            exception2 or (
+
+                do Step_1b
+                do Step_1c
+
+                do Step_2
+                do Step_3
+                do Step_4
+
+                do Step_5
+            )
+        )
+        do postlude
+    )
+)
diff --git a/contrib/snowball/algorithms/finnish/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/finnish/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..9ac74f292
--- /dev/null
+++ b/contrib/snowball/algorithms/finnish/stem_ISO_8859_1.sbl
@@ -0,0 +1,196 @@
+
+/* Finnish stemmer.
+
+   Numbers in square brackets refer to the sections in
+   Fred Karlsson, Finnish: An Essential Grammar. Routledge, 1999
+   ISBN 0-415-20705-3
+
+*/
+
+routines (
+           mark_regions
+           R2
+           particle_etc possessive
+           LONG VI
+           case_ending
+           i_plural
+           t_plural
+           other_endings
+           tidy
+)
+
+externals ( stem )
+
+integers ( p1 p2 )
+strings ( x )
+booleans ( ending_removed )
+groupings ( AEI V1 V2 particle_end )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef o"   hex 'F6'
+
+define AEI 'a{a"}ei'
+define V1 'aeiouy{a"}{o"}'
+define V2 'aeiou{a"}{o"}'
+define particle_end V1 + 'nt'
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    goto V1  gopast non-V1  setmark p1
+    goto V1  gopast non-V1  setmark p2
+)
+
+backwardmode (
+
+    define R2 as $p2 <= cursor
+
+    define particle_etc as (
+        setlimit tomark p1 for ([substring])
+        among(
+            'kin'
+            'kaan' 'k{a"}{a"}n'
+            'ko'   'k{o"}'
+            'han'  'h{a"}n'
+            'pa'   'p{a"}'    // Particles [91]
+                (particle_end)
+            'sti'             // Adverb [87]
+                (R2)
+        )
+        delete
+    )
+    define possessive as (    // [36]
+        setlimit tomark p1 for ([substring])
+        among(
+            'si'
+                (not 'k' delete)  // take 'ksi' as the Comitative case
+            'ni'
+                (delete ['kse'] <- 'ksi') // kseni = ksi + ni
+            'nsa' 'ns{a"}'
+            'mme'
+            'nne'
+                (delete)
+            /* Now for Vn possessives after case endings: [36] */
+            'an'
+                (among('ta' 'ssa' 'sta' 'lla' 'lta' 'na') delete)
+            '{a"}n'
+                (among('t{a"}' 'ss{a"}' 'st{a"}'
+                       'll{a"}' 'lt{a"}' 'n{a"}') delete)
+            'en'
+                (among('lle' 'ine') delete)
+        )
+    )
+
+    define LONG as
+        among('aa' 'ee' 'ii' 'oo' 'uu' '{a"}{a"}' '{o"}{o"}')
+
+    define VI as ('i' V2)
+
+    define case_ending as (
+        setlimit tomark p1 for ([substring])
+        among(
+            'han'    ('a')          //-.
+            'hen'    ('e')          // |
+            'hin'    ('i')          // |
+            'hon'    ('o')          // |
+            'h{a"}n' ('{a"}')       // Illative   [43]
+            'h{o"}n' ('{o"}')       // |
+            'siin'   VI             // |
+            'seen'   LONG           //-'
+
+            'den'    VI
+            'tten'   VI             // Genitive plurals [34]
+                     ()
+            'n'                     // Genitive or Illative
+                ( try ( LONG // Illative
+                        or 'ie' // Genitive
+                          and next ]
+                      )
+                  /* otherwise Genitive */
+                )
+
+            'a' '{a"}'              //-.
+                     (V1 non-V1)    // |
+            'tta' 'tt{a"}'          // Partitive  [32]
+                     ('e')          // |
+            'ta' 't{a"}'            //-'
+
+            'ssa' 'ss{a"}'          // Inessive   [41]
+            'sta' 'st{a"}'          // Elative    [42]
+
+            'lla' 'll{a"}'          // Adessive   [44]
+            'lta' 'lt{a"}'          // Ablative   [51]
+            'lle'                   // Allative   [46]
+            'na' 'n{a"}'            // Essive     [49]
+            'ksi'                   // Translative[50]
+            'ine'                   // Comitative [51]
+
+            /* Abessive and Instructive are too rare for
+               inclusion [51] */
+
+        )
+        delete
+        set ending_removed
+    )
+    define other_endings as (
+        setlimit tomark p2 for ([substring])
+        among(
+            'mpi' 'mpa' 'mp{a"}'
+            'mmi' 'mma' 'mm{a"}'    // Comparative forms [85]
+                (not 'po')          //-improves things
+            'impi' 'impa' 'imp{a"}'
+            'immi' 'imma' 'imm{a"}' // Superlative forms [86]
+            'eja' 'ej{a"}'          // indicates agent [93.1B]
+        )
+        delete
+    )
+    define i_plural as (            // [26]
+        setlimit tomark p1 for ([substring])
+        among(
+            'i'  'j'
+        )
+        delete
+    )
+    define t_plural as (            // [26]
+        setlimit tomark p1 for (
+            ['t'] test V1
+            delete
+        )
+        setlimit tomark p2 for ([substring])
+        among(
+            'mma' (not 'po') //-mmat endings
+            'imma'           //-immat endings
+        )
+        delete
+    )
+    define tidy as (
+        setlimit tomark p1 for (
+            do ( LONG and ([next] delete ) ) // undouble vowel
+            do ( [AEI] non-V1 delete ) // remove trailing a, a", e, i
+            do ( ['j'] 'o' or 'u' delete )
+            do ( ['o'] 'j' delete )
+        )
+        goto non-V1 [next] -> x  x delete // undouble consonant
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    unset ending_removed
+    backwards (
+        do particle_etc
+        do possessive
+        do case_ending
+        do other_endings
+        (ending_removed do i_plural) or do t_plural
+        do tidy
+    )
+)
+
diff --git a/contrib/snowball/algorithms/french/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/french/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..e972f227f
--- /dev/null
+++ b/contrib/snowball/algorithms/french/stem_ISO_8859_1.sbl
@@ -0,0 +1,248 @@
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           standard_suffix
+           i_verb_suffix
+           verb_suffix
+           residual_suffix
+           un_double
+           un_accent
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v keep_with_s )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a^   hex 'E2'  // a-circumflex
+stringdef a`   hex 'E0'  // a-grave
+stringdef c,   hex 'E7'  // c-cedilla
+
+stringdef e"   hex 'EB'  // e-diaeresis (rare)
+stringdef e'   hex 'E9'  // e-acute
+stringdef e^   hex 'EA'  // e-circumflex
+stringdef e`   hex 'E8'  // e-grave
+stringdef i"   hex 'EF'  // i-diaeresis
+stringdef i^   hex 'EE'  // i-circumflex
+stringdef o^   hex 'F4'  // o-circumflex
+stringdef u^   hex 'FB'  // u-circumflex
+stringdef u`   hex 'F9'  // u-grave
+
+define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
+
+define prelude as repeat goto (
+
+    (  v [ ('u' ] v <- 'U') or
+           ('i' ] v <- 'I') or
+           ('y' ] <- 'Y')
+    )
+    or
+    (  ['y'] v <- 'Y' )
+    or
+    (  'q' ['u'] <- 'U' )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v v next )
+        or
+        among ( // this exception list begun Nov 2006
+            'par'  // paris, parie, pari
+            'col'  // colis
+            'tap'  // tapis
+            // extensions possible here
+        )
+        or
+        ( next gopast v )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I' (<- 'i')
+        'U' (<- 'u')
+        'Y' (<- 'y')
+        ''  (next)
+    )
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        [substring] among(
+
+            'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
+            'ances' 'iqUes' 'ismes' 'ables' 'istes'
+               ( R2 delete )
+            'atrice' 'ateur' 'ation'
+            'atrices' 'ateurs' 'ations'
+               ( R2 delete
+                 try ( ['ic'] (R2 delete) or <-'iqU' )
+               )
+            'logie'
+            'logies'
+               ( R2 <- 'log' )
+            'usion' 'ution'
+            'usions' 'utions'
+               ( R2 <- 'u' )
+            'ence'
+            'ences'
+               ( R2 <- 'ent' )
+            'ement'
+            'ements'
+            (
+                RV delete
+                try (
+                    [substring] among(
+                        'iv'   (R2 delete ['at'] R2 delete)
+                        'eus'  ((R2 delete) or (R1<-'eux'))
+                        'abl' 'iqU'
+                               (R2 delete)
+                        'i{e`}r' 'I{e`}r'      //)
+                               (RV <-'i')      //)--new 2 Sept 02
+                    )
+                )
+            )
+            'it{e'}'
+            'it{e'}s'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil' ((R2 delete) or <-'abl')
+                        'ic'   ((R2 delete) or <-'iqU')
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'if' 'ive'
+            'ifs' 'ives'
+            (
+                R2 delete
+                try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
+            )
+            'eaux' (<- 'eau')
+            'aux'  (R1 <- 'al')
+            'euse'
+            'euses'((R2 delete) or (R1<-'eux'))
+
+            'issement'
+            'issements'(R1 non-v delete) // verbal
+
+            // fail(...) below forces entry to verb_suffix. -ment typically
+            // follows the p.p., e.g 'confus{e'}ment'.
+
+            'amment'   (RV fail(<- 'ant'))
+            'emment'   (RV fail(<- 'ent'))
+            'ment'
+            'ments'    (test(v RV) fail(delete))
+                       // v is e,i,u,{e'},I or U
+        )
+    )
+
+    define i_verb_suffix as setlimit tomark pV for (
+        [substring] among (
+            '{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
+            'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
+            'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
+            'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
+            'issez' 'issiez' 'issions' 'issons' 'it'
+                (non-v delete)
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among (
+            'ions'
+                (R2 delete)
+
+            '{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
+            'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
+            'erons' 'eront' 'ez' 'iez'
+
+            // 'ons' //-best omitted
+
+                (delete)
+
+            '{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
+            'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
+            'assions'
+                (delete
+                 try(['e'] delete)
+                )
+        )
+    )
+
+    define keep_with_s 'aiou{e`}s'
+
+    define residual_suffix as (
+        try(['s'] test non-keep_with_s delete)
+        setlimit tomark pV for (
+            [substring] among(
+                'ion'           (R2 's' or 't' delete)
+                'ier' 'i{e`}re'
+                'Ier' 'I{e`}re' (<-'i')
+                'e'             (delete)
+                '{e"}'          ('gu' delete)
+            )
+        )
+    )
+
+    define un_double as (
+        test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
+    )
+
+    define un_accent as (
+        atleast 1 non-v
+        [ '{e'}' or '{e`}' ] <-'e'
+    )
+)
+
+define stem as (
+
+    do prelude
+    do mark_regions
+    backwards (
+
+        do (
+            (
+                 ( standard_suffix or
+                   i_verb_suffix or
+                   verb_suffix
+                 )
+                 and
+                 try( [ ('Y'   ] <- 'i' ) or
+                        ('{c,}'] <- 'c' )
+                 )
+            ) or
+            residual_suffix
+        )
+
+        // try(['ent'] RV delete) // is best omitted
+
+        do un_double
+        do un_accent
+    )
+    do postlude
+)
+
diff --git a/contrib/snowball/algorithms/french/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/french/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 000000000..996eba1ab
--- /dev/null
+++ b/contrib/snowball/algorithms/french/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,239 @@
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           standard_suffix
+           i_verb_suffix
+           verb_suffix
+           residual_suffix
+           un_double
+           un_accent
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v keep_with_s )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a^   hex '83'  // a-circumflex
+stringdef a`   hex '85'  // a-grave
+stringdef c,   hex '87'  // c-cedilla
+
+stringdef e"   hex '89'  // e-diaeresis (rare)
+stringdef e'   hex '82'  // e-acute
+stringdef e^   hex '88'  // e-circumflex
+stringdef e`   hex '8A'  // e-grave
+stringdef i"   hex '8B'  // i-diaeresis
+stringdef i^   hex '8C'  // i-circumflex
+stringdef o^   hex '93'  // o-circumflex
+stringdef u^   hex '96'  // u-circumflex
+stringdef u`   hex '97'  // u-grave
+
+define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
+
+define prelude as repeat goto (
+
+    (  v [ ('u' ] v <- 'U') or
+           ('i' ] v <- 'I') or
+           ('y' ] <- 'Y')
+    )
+    or
+    (  ['y'] v <- 'Y' )
+    or
+    (  'q' ['u'] <- 'U' )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v v next ) or ( next gopast v )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I' (<- 'i')
+        'U' (<- 'u')
+        'Y' (<- 'y')
+        ''  (next)
+    )
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        [substring] among(
+
+            'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
+            'ances' 'iqUes' 'ismes' 'ables' 'istes'
+               ( R2 delete )
+            'atrice' 'ateur' 'ation'
+            'atrices' 'ateurs' 'ations'
+               ( R2 delete
+                 try ( ['ic'] (R2 delete) or <-'iqU' )
+               )
+            'logie'
+            'logies'
+               ( R2 <- 'log' )
+            'usion' 'ution'
+            'usions' 'utions'
+               ( R2 <- 'u' )
+            'ence'
+            'ences'
+               ( R2 <- 'ent' )
+            'ement'
+            'ements'
+            (
+                RV delete
+                try (
+                    [substring] among(
+                        'iv'   (R2 delete ['at'] R2 delete)
+                        'eus'  ((R2 delete) or (R1<-'eux'))
+                        'abl' 'iqU'
+                               (R2 delete)
+                        'i{e`}r' 'I{e`}r'      //)
+                               (RV <-'i')      //)--new 2 Sept 02
+                    )
+                )
+            )
+            'it{e'}'
+            'it{e'}s'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil' ((R2 delete) or <-'abl')
+                        'ic'   ((R2 delete) or <-'iqU')
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'if' 'ive'
+            'ifs' 'ives'
+            (
+                R2 delete
+                try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
+            )
+            'eaux' (<- 'eau')
+            'aux'  (R1 <- 'al')
+            'euse'
+            'euses'((R2 delete) or (R1<-'eux'))
+
+            'issement'
+            'issements'(R1 non-v delete) // verbal
+
+            // fail(...) below forces entry to verb_suffix. -ment typically
+            // follows the p.p., e.g 'confus{e'}ment'.
+
+            'amment'   (RV fail(<- 'ant'))
+            'emment'   (RV fail(<- 'ent'))
+            'ment'
+            'ments'    (test(v RV) fail(delete))
+                       // v is e,i,u,{e'},I or U
+        )
+    )
+
+    define i_verb_suffix as setlimit tomark pV for (
+        [substring] among (
+            '{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
+            'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
+            'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
+            'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
+            'issez' 'issiez' 'issions' 'issons' 'it'
+                (non-v delete)
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among (
+            'ions'
+                (R2 delete)
+
+            '{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
+            'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
+            'erons' 'eront' 'ez' 'iez'
+
+            // 'ons' //-best omitted
+
+                (delete)
+
+            '{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
+            'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
+            'assions'
+                (delete
+                 try(['e'] delete)
+                )
+        )
+    )
+
+    define keep_with_s 'aiou{e`}s'
+
+    define residual_suffix as (
+        try(['s'] test non-keep_with_s delete)
+        setlimit tomark pV for (
+            [substring] among(
+                'ion'           (R2 's' or 't' delete)
+                'ier' 'i{e`}re'
+                'Ier' 'I{e`}re' (<-'i')
+                'e'             (delete)
+                '{e"}'          ('gu' delete)
+            )
+        )
+    )
+
+    define un_double as (
+        test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
+    )
+
+    define un_accent as (
+        atleast 1 non-v
+        [ '{e'}' or '{e`}' ] <-'e'
+    )
+)
+
+define stem as (
+
+    do prelude
+    do mark_regions
+    backwards (
+
+        do (
+            (
+                 ( standard_suffix or
+                   i_verb_suffix or
+                   verb_suffix
+                 )
+                 and
+                 try( [ ('Y'   ] <- 'i' ) or
+                        ('{c,}'] <- 'c' )
+                 )
+            ) or
+            residual_suffix
+        )
+
+        // try(['ent'] RV delete) // is best omitted
+
+        do un_double
+        do un_accent
+    )
+    do postlude
+)
+
diff --git a/contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..7069daf0d
--- /dev/null
+++ b/contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl
@@ -0,0 +1,139 @@
+
+/*
+    Extra rule for -nisse ending added 11 Dec 2009
+*/
+
+routines (
+           prelude postlude
+           mark_regions
+           R1 R2
+           standard_suffix
+)
+
+externals ( stem )
+
+integers ( p1 p2 x )
+
+groupings ( v s_ending st_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef o"   hex 'F6'
+stringdef u"   hex 'FC'
+stringdef ss   hex 'DF'
+
+define v 'aeiouy{a"}{o"}{u"}'
+
+define s_ending  'bdfghklmnrt'
+define st_ending s_ending - 'r'
+
+define prelude as (
+
+    test repeat (
+        (
+            ['{ss}'] <- 'ss'
+        ) or next
+    )
+
+    repeat goto (
+        v [('u'] v <- 'U') or
+           ('y'] v <- 'Y')
+    )
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    test(hop 3 setmark x)
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < x  $p1 = x)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'    (<- 'y')
+        'U'    (<- 'u')
+        '{a"}' (<- 'a')
+        '{o"}' (<- 'o')
+        '{u"}' (<- 'u')
+        ''     (next)
+    )
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        do (
+            [substring] R1 among(
+                'em' 'ern' 'er'
+                (   delete
+                )
+                'e' 'en' 'es'
+                (   delete
+                    try (['s'] 'nis' delete)
+                )
+                's'
+                (   s_ending delete
+                )
+            )
+        )
+        do (
+            [substring] R1 among(
+                'en' 'er' 'est'
+                (   delete
+                )
+                'st'
+                (   st_ending hop 3 delete
+                )
+            )
+        )
+        do (
+            [substring] R2 among(
+                'end' 'ung'
+                (   delete
+                    try (['ig'] not 'e' R2 delete)
+                )
+                'ig' 'ik' 'isch'
+                (   not 'e' delete
+                )
+                'lich' 'heit'
+                (   delete
+                    try (
+                        ['er' or 'en'] R1 delete
+                    )
+                )
+                'keit'
+                (   delete
+                    try (
+                        [substring] R2 among(
+                            'lich' 'ig'
+                            (   delete
+                            )
+                        )
+                    )
+                )
+            )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards
+        do standard_suffix
+    do postlude
+)
diff --git a/contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 000000000..3effb3257
--- /dev/null
+++ b/contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,139 @@
+
+/*
+    Extra rule for -nisse ending added 11 Dec 2009
+*/
+
+routines (
+           prelude postlude
+           mark_regions
+           R1 R2
+           standard_suffix
+)
+
+externals ( stem )
+
+integers ( p1 p2 x )
+
+groupings ( v s_ending st_ending )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a"   hex '84'
+stringdef o"   hex '94'
+stringdef u"   hex '81'
+stringdef ss   hex 'E1'
+
+define v 'aeiouy{a"}{o"}{u"}'
+
+define s_ending  'bdfghklmnrt'
+define st_ending s_ending - 'r'
+
+define prelude as (
+
+    test repeat (
+        (
+            ['{ss}'] <- 'ss'
+        ) or next
+    )
+
+    repeat goto (
+        v [('u'] v <- 'U') or
+           ('y'] v <- 'Y')
+    )
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    test(hop 3 setmark x)
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < x  $p1 = x)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'    (<- 'y')
+        'U'    (<- 'u')
+        '{a"}' (<- 'a')
+        '{o"}' (<- 'o')
+        '{u"}' (<- 'u')
+        ''     (next)
+    )
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        do (
+            [substring] R1 among(
+                'em' 'ern' 'er'
+                (   delete
+                )
+                'e' 'en' 'es'
+                (   delete
+                    try (['s'] 'nis' delete)
+                )
+                's'
+                (   s_ending delete
+                )
+            )
+        )
+        do (
+            [substring] R1 among(
+                'en' 'er' 'est'
+                (   delete
+                )
+                'st'
+                (   st_ending hop 3 delete
+                )
+            )
+        )
+        do (
+            [substring] R2 among(
+                'end' 'ung'
+                (   delete
+                    try (['ig'] not 'e' R2 delete)
+                )
+                'ig' 'ik' 'isch'
+                (   not 'e' delete
+                )
+                'lich' 'heit'
+                (   delete
+                    try (
+                        ['er' or 'en'] R1 delete
+                    )
+                )
+                'keit'
+                (   delete
+                    try (
+                        [substring] R2 among(
+                            'lich' 'ig'
+                            (   delete
+                            )
+                        )
+                    )
+                )
+            )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards
+        do standard_suffix
+    do postlude
+)
diff --git a/contrib/snowball/algorithms/german2/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/german2/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..ce6026a86
--- /dev/null
+++ b/contrib/snowball/algorithms/german2/stem_ISO_8859_1.sbl
@@ -0,0 +1,145 @@
+
+/*
+    Extra rule for -nisse ending added 11 Dec 2009
+*/
+
+routines (
+           prelude postlude
+           mark_regions
+           R1 R2
+           standard_suffix
+)
+
+externals ( stem )
+
+integers ( p1 p2 x )
+
+groupings ( v s_ending st_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef o"   hex 'F6'
+stringdef u"   hex 'FC'
+stringdef ss   hex 'DF'
+
+define v 'aeiouy{a"}{o"}{u"}'
+
+define s_ending  'bdfghklmnrt'
+define st_ending s_ending - 'r'
+
+define prelude as (
+
+    test repeat goto (
+        v [('u'] v <- 'U') or
+           ('y'] v <- 'Y')
+    )
+
+    repeat (
+        [substring] among(
+            '{ss}' (<- 'ss')
+            'ae'   (<- '{a"}')
+            'oe'   (<- '{o"}')
+            'ue'   (<- '{u"}')
+            'qu'   (hop 2)
+            ''     (next)
+        )
+    )
+
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    test(hop 3 setmark x)
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < x  $p1 = x)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'    (<- 'y')
+        'U'    (<- 'u')
+        '{a"}' (<- 'a')
+        '{o"}' (<- 'o')
+        '{u"}' (<- 'u')
+        ''     (next)
+    )
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        do (
+            [substring] R1 among(
+                'em' 'ern' 'er'
+                (   delete
+                )
+                'e' 'en' 'es'
+                (   delete
+                    try (['s'] 'nis' delete)
+                )
+                's'
+                (   s_ending delete
+                )
+            )
+        )
+        do (
+            [substring] R1 among(
+                'en' 'er' 'est'
+                (   delete
+                )
+                'st'
+                (   st_ending hop 3 delete
+                )
+            )
+        )
+        do (
+            [substring] R2 among(
+                'end' 'ung'
+                (   delete
+                    try (['ig'] not 'e' R2 delete)
+                )
+                'ig' 'ik' 'isch'
+                (   not 'e' delete
+                )
+                'lich' 'heit'
+                (   delete
+                    try (
+                        ['er' or 'en'] R1 delete
+                    )
+                )
+                'keit'
+                (   delete
+                    try (
+                        [substring] R2 among(
+                            'lich' 'ig'
+                            (   delete
+                            )
+                        )
+                    )
+                )
+            )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards
+        do standard_suffix
+    do postlude
+)
diff --git a/contrib/snowball/algorithms/hungarian/stem_ISO_8859_2.sbl b/contrib/snowball/algorithms/hungarian/stem_ISO_8859_2.sbl
new file mode 100644
index 000000000..d1a644931
--- /dev/null
+++ b/contrib/snowball/algorithms/hungarian/stem_ISO_8859_2.sbl
@@ -0,0 +1,241 @@
+/*
+Hungarian Stemmer
+Removes noun inflections
+*/
+
+routines (
+    mark_regions
+    R1
+    v_ending
+    case
+    case_special
+    case_other
+    plural
+    owned
+    sing_owner
+    plur_owner
+    instrum
+    factive
+    undouble
+    double
+)
+
+externals ( stem )
+
+integers ( p1 )
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in ISO Latin 2) */
+
+stringdef a'  hex 'E1'  //a-acute
+stringdef e'  hex 'E9'  //e-acute
+stringdef i'  hex 'ED'  //i-acute
+stringdef o'  hex 'F3'  //o-acute
+stringdef o"  hex 'F6'  //o-umlaut
+stringdef oq  hex 'F5'  //o-double acute
+stringdef u'  hex 'FA'  //u-acute
+stringdef u"  hex 'FC'  //u-umlaut
+stringdef uq  hex 'FB'  //u-double acute
+
+define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    (v goto non-v
+     among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
+     setmark p1)
+    or
+
+    (non-v gopast v setmark p1)
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+
+    define v_ending as (
+        [substring] R1 among(
+            '{a'}' (<- 'a')
+            '{e'}' (<- 'e')
+        )
+    )
+
+    define double as (
+        test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
+        'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
+    )
+
+    define undouble as (
+        next [hop 1] delete
+    )
+
+    define instrum as(
+        [substring] R1 among(
+            'al' (double)
+            'el' (double)
+        )
+        delete
+        undouble
+    )
+
+
+    define case as (
+        [substring] R1 among(
+            'ban' 'ben'
+            'ba' 'be'
+            'ra' 're'
+            'nak' 'nek'
+            'val' 'vel'
+            't{o'}l' 't{oq}l'
+            'r{o'}l' 'r{oq}l'
+            'b{o'}l' 'b{oq}l'
+            'hoz' 'hez' 'h{o"}z'
+            'n{a'}l' 'n{e'}l'
+            'ig'
+            'at' 'et' 'ot' '{o"}t'
+            '{e'}rt'
+            'k{e'}pp' 'k{e'}ppen'
+            'kor'
+            'ul' '{u"}l'
+            'v{a'}' 'v{e'}'
+            'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
+            'k{e'}nt'
+            'en' 'on' 'an' '{o"}n'
+            'n'
+            't'
+        )
+        delete
+        v_ending
+    )
+
+    define case_special as(
+        [substring] R1 among(
+            '{e'}n' (<- 'e')
+            '{a'}n' (<- 'a')
+            '{a'}nk{e'}nt' (<- 'a')
+        )
+    )
+
+    define case_other as(
+        [substring] R1 among(
+            'astul' 'est{u"}l' (delete)
+            'stul' 'st{u"}l' (delete)
+            '{a'}stul' (<- 'a')
+            '{e'}st{u"}l' (<- 'e')
+        )
+    )
+
+    define factive as(
+        [substring] R1 among(
+            '{a'}' (double)
+            '{e'}' (double)
+        )
+        delete
+        undouble
+    )
+
+    define plural as (
+        [substring] R1 among(
+            '{a'}k' (<- 'a')
+            '{e'}k' (<- 'e')
+            '{o"}k' (delete)
+            'ak' (delete)
+            'ok' (delete)
+            'ek' (delete)
+            'k' (delete)
+        )
+    )
+
+    define owned as (
+        [substring] R1 among (
+            'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
+            '{e'}k{e'}' (<- 'e')
+            '{a'}k{e'}' (<- 'a')
+            'k{e'}' (delete)
+            '{e'}{e'}i' (<- 'e')
+            '{a'}{e'}i' (<- 'a')
+            '{e'}i'  (delete)
+            '{e'}{e'}' (<- 'e')
+            '{e'}' (delete)
+        )
+    )
+
+    define sing_owner as (
+        [substring] R1 among(
+            '{u"}nk' 'unk' (delete)
+            '{a'}nk' (<- 'a')
+            '{e'}nk' (<- 'e')
+            'nk' (delete)
+            '{a'}juk' (<- 'a')
+            '{e'}j{u"}k' (<- 'e')
+            'juk' 'j{u"}k' (delete)
+            'uk' '{u"}k' (delete)
+            'em' 'om' 'am' (delete)
+            '{a'}m' (<- 'a')
+            '{e'}m' (<- 'e')
+            'm' (delete)
+            'od' 'ed' 'ad' '{o"}d' (delete)
+            '{a'}d' (<- 'a')
+            '{e'}d' (<- 'e')
+            'd' (delete)
+            'ja' 'je' (delete)
+            'a' 'e' 'o' (delete)
+            '{a'}' (<- 'a')
+            '{e'}' (<- 'e')
+        )
+    )
+
+    define plur_owner as (
+        [substring] R1 among(
+            'jaim' 'jeim' (delete)
+            '{a'}im' (<- 'a')
+            '{e'}im' (<- 'e')
+            'aim' 'eim' (delete)
+            'im' (delete)
+            'jaid' 'jeid' (delete)
+            '{a'}id' (<- 'a')
+            '{e'}id' (<- 'e')
+            'aid' 'eid' (delete)
+            'id' (delete)
+            'jai' 'jei' (delete)
+            '{a'}i' (<- 'a')
+            '{e'}i' (<- 'e')
+            'ai' 'ei' (delete)
+            'i' (delete)
+            'jaink' 'jeink' (delete)
+            'eink' 'aink' (delete)
+            '{a'}ink' (<- 'a')
+            '{e'}ink' (<- 'e')
+            'ink'
+            'jaitok' 'jeitek' (delete)
+            'aitok' 'eitek' (delete)
+            '{a'}itok' (<- 'a')
+            '{e'}itek' (<- 'e')
+            'itek' (delete)
+            'jeik' 'jaik' (delete)
+            'aik' 'eik' (delete)
+            '{a'}ik' (<- 'a')
+            '{e'}ik' (<- 'e')
+            'ik' (delete)
+        )
+    )
+)
+
+define stem as (
+    do mark_regions
+    backwards (
+      do instrum
+        do case
+        do case_special
+        do case_other
+        do factive
+        do owned
+        do sing_owner
+        do plur_owner
+        do plural
+    )
+)
diff --git a/contrib/snowball/algorithms/hungarian/stem_Unicode.sbl b/contrib/snowball/algorithms/hungarian/stem_Unicode.sbl
new file mode 100644
index 000000000..c812e055c
--- /dev/null
+++ b/contrib/snowball/algorithms/hungarian/stem_Unicode.sbl
@@ -0,0 +1,241 @@
+/*
+Hungarian Stemmer
+Removes noun inflections
+*/
+
+routines (
+    mark_regions
+    R1
+    v_ending
+    case
+    case_special
+    case_other
+    plural
+    owned
+    sing_owner
+    plur_owner
+    instrum
+    factive
+    undouble
+    double
+)
+
+externals ( stem )
+
+integers ( p1 )
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in Unicode) */
+
+stringdef a'  hex 'E1'  //a-acute
+stringdef e'  hex 'E9'  //e-acute
+stringdef i'  hex 'ED'  //i-acute
+stringdef o'  hex 'F3'  //o-acute
+stringdef o"  hex 'F6'  //o-umlaut
+stringdef oq  hex '151' //o-double acute
+stringdef u'  hex 'FA'  //u-acute
+stringdef u"  hex 'FC'  //u-umlaut
+stringdef uq  hex '171' //u-double acute
+
+define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    (v goto non-v
+     among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
+     setmark p1)
+    or
+
+    (non-v gopast v setmark p1)
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+
+    define v_ending as (
+        [substring] R1 among(
+            '{a'}' (<- 'a')
+            '{e'}' (<- 'e')
+        )
+    )
+
+    define double as (
+        test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
+        'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
+    )
+
+    define undouble as (
+        next [hop 1] delete
+    )
+
+    define instrum as(
+        [substring] R1 among(
+            'al' (double)
+            'el' (double)
+        )
+        delete
+        undouble
+    )
+
+
+    define case as (
+        [substring] R1 among(
+            'ban' 'ben'
+            'ba' 'be'
+            'ra' 're'
+            'nak' 'nek'
+            'val' 'vel'
+            't{o'}l' 't{oq}l'
+            'r{o'}l' 'r{oq}l'
+            'b{o'}l' 'b{oq}l'
+            'hoz' 'hez' 'h{o"}z'
+            'n{a'}l' 'n{e'}l'
+            'ig'
+            'at' 'et' 'ot' '{o"}t'
+            '{e'}rt'
+            'k{e'}pp' 'k{e'}ppen'
+            'kor'
+            'ul' '{u"}l'
+            'v{a'}' 'v{e'}'
+            'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
+            'k{e'}nt'
+            'en' 'on' 'an' '{o"}n'
+            'n'
+            't'
+        )
+        delete
+        v_ending
+    )
+
+    define case_special as(
+        [substring] R1 among(
+            '{e'}n' (<- 'e')
+            '{a'}n' (<- 'a')
+            '{a'}nk{e'}nt' (<- 'a')
+        )
+    )
+
+    define case_other as(
+        [substring] R1 among(
+            'astul' 'est{u"}l' (delete)
+            'stul' 'st{u"}l' (delete)
+            '{a'}stul' (<- 'a')
+            '{e'}st{u"}l' (<- 'e')
+        )
+    )
+
+    define factive as(
+        [substring] R1 among(
+            '{a'}' (double)
+            '{e'}' (double)
+        )
+        delete
+        undouble
+    )
+
+    define plural as (
+        [substring] R1 among(
+            '{a'}k' (<- 'a')
+            '{e'}k' (<- 'e')
+            '{o"}k' (delete)
+            'ak' (delete)
+            'ok' (delete)
+            'ek' (delete)
+            'k' (delete)
+        )
+    )
+
+    define owned as (
+        [substring] R1 among (
+            'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
+            '{e'}k{e'}' (<- 'e')
+            '{a'}k{e'}' (<- 'a')
+            'k{e'}' (delete)
+            '{e'}{e'}i' (<- 'e')
+            '{a'}{e'}i' (<- 'a')
+            '{e'}i'  (delete)
+            '{e'}{e'}' (<- 'e')
+            '{e'}' (delete)
+        )
+    )
+
+    define sing_owner as (
+        [substring] R1 among(
+            '{u"}nk' 'unk' (delete)
+            '{a'}nk' (<- 'a')
+            '{e'}nk' (<- 'e')
+            'nk' (delete)
+            '{a'}juk' (<- 'a')
+            '{e'}j{u"}k' (<- 'e')
+            'juk' 'j{u"}k' (delete)
+            'uk' '{u"}k' (delete)
+            'em' 'om' 'am' (delete)
+            '{a'}m' (<- 'a')
+            '{e'}m' (<- 'e')
+            'm' (delete)
+            'od' 'ed' 'ad' '{o"}d' (delete)
+            '{a'}d' (<- 'a')
+            '{e'}d' (<- 'e')
+            'd' (delete)
+            'ja' 'je' (delete)
+            'a' 'e' 'o' (delete)
+            '{a'}' (<- 'a')
+            '{e'}' (<- 'e')
+        )
+    )
+
+    define plur_owner as (
+        [substring] R1 among(
+            'jaim' 'jeim' (delete)
+            '{a'}im' (<- 'a')
+            '{e'}im' (<- 'e')
+            'aim' 'eim' (delete)
+            'im' (delete)
+            'jaid' 'jeid' (delete)
+            '{a'}id' (<- 'a')
+            '{e'}id' (<- 'e')
+            'aid' 'eid' (delete)
+            'id' (delete)
+            'jai' 'jei' (delete)
+            '{a'}i' (<- 'a')
+            '{e'}i' (<- 'e')
+            'ai' 'ei' (delete)
+            'i' (delete)
+            'jaink' 'jeink' (delete)
+            'eink' 'aink' (delete)
+            '{a'}ink' (<- 'a')
+            '{e'}ink' (<- 'e')
+            'ink'
+            'jaitok' 'jeitek' (delete)
+            'aitok' 'eitek' (delete)
+            '{a'}itok' (<- 'a')
+            '{e'}itek' (<- 'e')
+            'itek' (delete)
+            'jeik' 'jaik' (delete)
+            'aik' 'eik' (delete)
+            '{a'}ik' (<- 'a')
+            '{e'}ik' (<- 'e')
+            'ik' (delete)
+        )
+    )
+)
+
+define stem as (
+    do mark_regions
+    backwards (
+      do instrum
+        do case
+        do case_special
+        do case_other
+        do factive
+        do owned
+        do sing_owner
+        do plur_owner
+        do plural
+    )
+)
diff --git a/contrib/snowball/algorithms/italian/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/italian/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..8d25cf64f
--- /dev/null
+++ b/contrib/snowball/algorithms/italian/stem_ISO_8859_1.sbl
@@ -0,0 +1,195 @@
+
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           attached_pronoun
+           standard_suffix
+           verb_suffix
+           vowel_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v AEIO CG )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a'   hex 'E1'
+stringdef a`   hex 'E0'
+stringdef e'   hex 'E9'
+stringdef e`   hex 'E8'
+stringdef i'   hex 'ED'
+stringdef i`   hex 'EC'
+stringdef o'   hex 'F3'
+stringdef o`   hex 'F2'
+stringdef u'   hex 'FA'
+stringdef u`   hex 'F9'
+
+define v 'aeiou{a`}{e`}{i`}{o`}{u`}'
+
+define prelude as (
+    test repeat (
+        [substring] among(
+            '{a'}' (<- '{a`}')
+            '{e'}' (<- '{e`}')
+            '{i'}' (<- '{i`}')
+            '{o'}' (<- '{o`}')
+            '{u'}' (<- '{u`}')
+            'qu'   (<- 'qU')
+            ''     (next)
+        )
+    )
+    repeat goto (
+        v [ ('u' ] v <- 'U') or
+            ('i' ] v <- 'I')
+    )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I'  (<- 'i')
+        'U'  (<- 'u')
+        ''   (next)
+    )
+
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define attached_pronoun as (
+        [substring] among(
+            'ci' 'gli' 'la' 'le' 'li' 'lo'
+            'mi' 'ne' 'si'  'ti' 'vi'
+            // the compound forms are:
+            'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
+            'mela' 'mele' 'meli' 'melo' 'mene'
+            'tela' 'tele' 'teli' 'telo' 'tene'
+            'cela' 'cele' 'celi' 'celo' 'cene'
+            'vela' 'vele' 'veli' 'velo' 'vene'
+        )
+        among( (RV)
+            'ando' 'endo'   (delete)
+            'ar' 'er' 'ir'  (<- 'e')
+        )
+    )
+
+    define standard_suffix as (
+        [substring] among(
+
+            'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
+            'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
+            'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
+            'atrice' 'atrici'
+            'ante' 'anti' // Note 1
+               ( R2 delete )
+            'azione' 'azioni' 'atore' 'atori'
+               ( R2 delete
+                 try ( ['ic'] R2 delete )
+               )
+            'logia' 'logie'
+               ( R2 <- 'log' )
+            'uzione' 'uzioni' 'usione' 'usioni'
+               ( R2 <- 'u' )
+            'enza' 'enze'
+               ( R2 <- 'ente' )
+            'amento' 'amenti' 'imento' 'imenti'
+               ( RV delete )
+            'amente' (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' ( ['at'] R2 delete )
+                        'os' 'ic' 'abil'
+                    )
+                )
+            )
+            'it{a`}' (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil' 'ic' 'iv' (R2 delete)
+                    )
+                )
+            )
+            'ivo' 'ivi' 'iva' 'ive' (
+                R2 delete
+                try ( ['at'] R2 delete ['ic'] R2 delete )
+            )
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
+            'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
+            'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
+            'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
+            'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
+            'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
+            'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
+            'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
+            'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
+            'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
+            'ono' 'uta' 'ute' 'uti' 'uto'
+
+            'ar' 'ir' // but 'er' is problematical
+                (delete)
+        )
+    )
+
+    define AEIO 'aeio{a`}{e`}{i`}{o`}'
+    define CG 'cg'
+
+    define vowel_suffix as (
+        try (
+            [AEIO] RV delete
+            ['i'] RV delete
+        )
+        try (
+            ['h'] CG RV delete
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do attached_pronoun
+        do (standard_suffix or verb_suffix)
+        do vowel_suffix
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
+
diff --git a/contrib/snowball/algorithms/italian/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/italian/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 000000000..b43295c09
--- /dev/null
+++ b/contrib/snowball/algorithms/italian/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,195 @@
+
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           attached_pronoun
+           standard_suffix
+           verb_suffix
+           vowel_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v AEIO CG )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a'   hex 'A0'
+stringdef a`   hex '85'
+stringdef e'   hex '82'
+stringdef e`   hex '8A'
+stringdef i'   hex 'A1'
+stringdef i`   hex '8D'
+stringdef o'   hex 'A2'
+stringdef o`   hex '95'
+stringdef u'   hex 'A3'
+stringdef u`   hex '97'
+
+define v 'aeiou{a`}{e`}{i`}{o`}{u`}'
+
+define prelude as (
+    test repeat (
+        [substring] among(
+            '{a'}' (<- '{a`}')
+            '{e'}' (<- '{e`}')
+            '{i'}' (<- '{i`}')
+            '{o'}' (<- '{o`}')
+            '{u'}' (<- '{u`}')
+            'qu'   (<- 'qU')
+            ''     (next)
+        )
+    )
+    repeat goto (
+        v [ ('u' ] v <- 'U') or
+            ('i' ] v <- 'I')
+    )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I'  (<- 'i')
+        'U'  (<- 'u')
+        ''   (next)
+    )
+
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define attached_pronoun as (
+        [substring] among(
+            'ci' 'gli' 'la' 'le' 'li' 'lo'
+            'mi' 'ne' 'si'  'ti' 'vi'
+            // the compound forms are:
+            'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
+            'mela' 'mele' 'meli' 'melo' 'mene'
+            'tela' 'tele' 'teli' 'telo' 'tene'
+            'cela' 'cele' 'celi' 'celo' 'cene'
+            'vela' 'vele' 'veli' 'velo' 'vene'
+        )
+        among( (RV)
+            'ando' 'endo'   (delete)
+            'ar' 'er' 'ir'  (<- 'e')
+        )
+    )
+
+    define standard_suffix as (
+        [substring] among(
+
+            'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
+            'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
+            'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
+            'atrice' 'atrici'
+            'ante' 'anti' // Note 1
+               ( R2 delete )
+            'azione' 'azioni' 'atore' 'atori'
+               ( R2 delete
+                 try ( ['ic'] R2 delete )
+               )
+            'logia' 'logie'
+               ( R2 <- 'log' )
+            'uzione' 'uzioni' 'usione' 'usioni'
+               ( R2 <- 'u' )
+            'enza' 'enze'
+               ( R2 <- 'ente' )
+            'amento' 'amenti' 'imento' 'imenti'
+               ( RV delete )
+            'amente' (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' ( ['at'] R2 delete )
+                        'os' 'ic' 'abil'
+                    )
+                )
+            )
+            'it{a`}' (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil' 'ic' 'iv' (R2 delete)
+                    )
+                )
+            )
+            'ivo' 'ivi' 'iva' 'ive' (
+                R2 delete
+                try ( ['at'] R2 delete ['ic'] R2 delete )
+            )
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
+            'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
+            'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
+            'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
+            'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
+            'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
+            'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
+            'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
+            'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
+            'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
+            'ono' 'uta' 'ute' 'uti' 'uto'
+
+            'ar' 'ir' // but 'er' is problematical
+                (delete)
+        )
+    )
+
+    define AEIO 'aeio{a`}{e`}{i`}{o`}'
+    define CG 'cg'
+
+    define vowel_suffix as (
+        try (
+            [AEIO] RV delete
+            ['i'] RV delete
+        )
+        try (
+            ['h'] CG RV delete
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do attached_pronoun
+        do (standard_suffix or verb_suffix)
+        do vowel_suffix
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
+
diff --git a/contrib/snowball/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..cd79d1280
--- /dev/null
+++ b/contrib/snowball/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
@@ -0,0 +1,245 @@
+strings ( ch )
+integers ( x p1 p2 )
+booleans ( Y_found stemmed GE_removed )
+
+routines (
+
+   R1 R2
+   C V VX
+   lengthen_V
+   Step_1 Step_2 Step_3 Step_4 Step_7
+   Step_6 Step_1c
+   Lose_prefix
+   Lose_infix
+   measure
+)
+
+externals ( stem )
+
+groupings ( v v_WX AOU AIOU )
+
+stringescapes {}
+
+stringdef '   hex '27'  // yuk
+
+define v        'aeiouy'
+define v_WX     v + 'wx'
+define AOU      'aou'
+define AIOU     'aiou'
+
+backwardmode (
+
+    define R1 as (setmark x $x >= p1)
+    define R2 as (setmark x $x >= p2)
+
+    define V  as test (v or 'ij')
+    define VX as test (next v or 'ij')
+    define C  as test (not 'ij' non-v)
+
+    define lengthen_V as do (
+        non-v_WX [ (AOU] test (non-v or atlimit)) or
+                   ('e'] test (non-v or atlimit
+                               not AIOU
+                               not (next AIOU non-v)))
+        ->ch insert ch
+    )
+
+    define Step_1 as
+    (
+        [among ( (])
+
+            '{'}s' (delete)
+            's'    (R1 not ('t' R1) C delete)
+            'ies'  (R1 <-'ie')
+            'es'
+                   (('ar' R1 C ] delete lengthen_V) or
+                    ('er' R1 C ] delete) or
+                    (R1 C <-'e'))
+
+            'aus'  (R1 V <-'au')
+            'en'   (('hed' R1 ] <-'heid') or
+                    ('nd' delete) or
+                    ('d' R1 C ] delete) or
+                    ('i' or 'j' V delete) or
+                    (R1 C delete lengthen_V))
+            'nde'  (<-'nd')
+        )
+    )
+
+    define Step_2 as
+    (
+        [among ( (])
+            'je'   (('{'}t' ] delete) or
+                    ('et'   ] R1 C delete) or
+                    ('rnt'  ] <-'rn') or
+                    ('t'    ] R1 VX delete) or
+                    ('ink'  ] <-'ing') or
+                    ('mp'   ] <-'m') or
+                    ('{'}'  ] R1 delete) or
+                    (] R1 C delete))
+            'ge'   (R1 <-'g')
+            'lijke'(R1 <-'lijk')
+            'ische'(R1 <-'isch')
+            'de'   (R1 C delete)
+            'te'   (R1 <-'t')
+            'se'   (R1 <-'s')
+            're'   (R1 <-'r')
+            'le'   (R1 delete attach 'l' lengthen_V)
+            'ene'  (R1 C delete attach 'en' lengthen_V)
+            'ieve' (R1 C <-'ief')
+        )
+    )
+
+    define Step_3 as
+    (
+        [among ( (])
+            'atie'  (R1 <-'eer')
+            'iteit' (R1 delete lengthen_V)
+            'heid'
+            'sel'
+            'ster'  (R1 delete)
+            'rder'  (<-'r')
+            'ing'
+            'isme'
+            'erij'  (R1 delete lengthen_V)
+            'arij'  (R1 C <-'aar')
+            'fie'   (R2 delete attach 'f' lengthen_V)
+            'gie'   (R2 delete attach 'g' lengthen_V)
+            'tst'   (R1 C <-'t')
+            'dst'   (R1 C <-'d')
+        )
+    )
+
+    define Step_4 as
+    (
+        (   [among ( (])
+                'ioneel'  (R1 <-'ie')
+                'atief'   (R1 <-'eer')
+                'baar'    (R1 delete)
+                'naar'    (R1 V <-'n')
+                'laar'    (R1 V <-'l')
+                'raar'    (R1 V <-'r')
+                'tant'    (R1 <-'teer')
+                'lijker'
+                'lijkst'  (R1 <-'lijk')
+                'achtig'
+                'achtiger'
+                'achtigst'(R1 delete)
+                'eriger'
+                'erigst'
+                'erig'
+                'end'     (R1 C delete lengthen_V)
+            )
+        )
+        or
+        (   [among ( (])
+                'iger'
+                'igst'
+                'ig'      (R1 C delete lengthen_V)
+            )
+        )
+    )
+
+    define Step_7 as
+    (
+        [among ( (])
+            'kt'   (<-'k')
+            'ft'   (<-'f')
+            'pt'   (<-'p')
+        )
+    )
+
+    define Step_6 as
+    (
+        [among ( (])
+            'bb'   (<-'b')
+            'cc'   (<-'c')
+            'dd'   (<-'d')
+            'ff'   (<-'f')
+            'gg'   (<-'g')
+            'hh'   (<-'h')
+            'jj'   (<-'j')
+            'kk'   (<-'k')
+            'll'   (<-'l')
+            'mm'   (<-'m')
+            'nn'   (<-'n')
+            'pp'   (<-'p')
+            'qq'   (<-'q')
+            'rr'   (<-'r')
+            'ss'   (<-'s')
+            'tt'   (<-'t')
+            'vv'   (<-'v')
+            'ww'   (<-'w')
+            'xx'   (<-'x')
+            'zz'   (<-'z')
+            'v'    (<-'f')
+            'z'    (<-'s')
+        )
+    )
+
+    define Step_1c as
+    (
+        [among ( (] R1 C)
+            'd' (not ('n' R1) delete)
+            't' (not ('h' R1) delete)
+        )
+    )
+)
+
+define Lose_prefix as (
+    ['ge'] test hop 3 (goto v goto non-v)
+    set GE_removed
+    delete
+)
+
+define Lose_infix as (
+    next
+    gopast (['ge']) test hop 3 (goto v goto non-v)
+    set GE_removed
+    delete
+)
+
+define measure as (
+    do (
+        tolimit
+        setmark p1
+        setmark p2
+    )
+    do(
+        repeat non-v  atleast 1 ('ij' or v)  non-v  setmark p1
+        repeat non-v  atleast 1 ('ij' or v)  non-v  setmark p2
+    )
+
+)
+define stem as (
+
+    unset Y_found
+    unset stemmed
+    do ( ['y'] <-'Y' set Y_found )
+    do repeat(goto (v  ['y'])<-'Y' set Y_found )
+
+    measure
+
+    backwards (
+            do (Step_1 set stemmed )
+            do (Step_2 set stemmed )
+            do (Step_3 set stemmed )
+            do (Step_4 set stemmed )
+    )
+    unset GE_removed
+    do (Lose_prefix and measure)
+    backwards (
+            do (GE_removed Step_1c)
+        )
+    unset GE_removed
+    do (Lose_infix and measure)
+    backwards (
+            do (GE_removed Step_1c)
+        )
+    backwards (
+            do (Step_7 set stemmed )
+            do (stemmed or GE_removed Step_6)
+        )
+    do(Y_found  repeat(goto (['Y']) <-'y'))
+)
+
diff --git a/contrib/snowball/algorithms/lovins/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/lovins/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..3f69f1572
--- /dev/null
+++ b/contrib/snowball/algorithms/lovins/stem_ISO_8859_1.sbl
@@ -0,0 +1,208 @@
+
+stringescapes {}
+
+routines (
+   A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA BB CC
+
+   endings
+
+   undouble respell
+)
+
+externals ( stem )
+
+backwardmode (
+
+  /* Lovins' conditions A, B ... CC, as given in her Appendix B, where
+     a test for a two letter prefix ('test hop 2') is implicitly
+     assumed. Note that 'e' next 'u' corresponds to her u*e because
+     Snowball is scanning backwards. */
+
+  define A  as ( hop 2 )
+  define B  as ( hop 3 )
+  define C  as ( hop 4 )
+  define D  as ( hop 5 )
+  define E  as ( test hop 2 not 'e' )
+  define F  as ( test hop 3 not 'e' )
+  define G  as ( test hop 3 'f' )
+  define H  as ( test hop 2 't' or 'll' )
+  define I  as ( test hop 2 not 'o' not 'e' )
+  define J  as ( test hop 2 not 'a' not 'e' )
+  define K  as ( test hop 3 'l' or 'i' or ('e' next 'u') )
+  define L  as ( test hop 2 not 'u' not 'x' not ('s' not 'o') )
+  define M  as ( test hop 2 not 'a' not 'c' not 'e' not 'm' )
+  define N  as ( test hop 3 ( hop 2 not 's' or hop 2 ) )
+  define O  as ( test hop 2 'l' or 'i' )
+  define P  as ( test hop 2 not 'c' )
+  define Q  as ( test hop 2 test hop 3 not 'l' not 'n' )
+  define R  as ( test hop 2 'n' or 'r' )
+  define S  as ( test hop 2 'dr' or ('t' not 't') )
+  define T  as ( test hop 2 's' or ('t' not 'o') )
+  define U  as ( test hop 2 'l' or 'm' or 'n' or 'r' )
+  define V  as ( test hop 2 'c' )
+  define W  as ( test hop 2 not 's' not 'u' )
+  define X  as ( test hop 2 'l' or 'i' or ('e' next 'u') )
+  define Y  as ( test hop 2 'in' )
+  define Z  as ( test hop 2 not 'f' )
+  define AA as ( test hop 2 among ( 'd' 'f' 'ph' 'th' 'l' 'er' 'or'
+                                    'es' 't' ) )
+  define BB as ( test hop 3 not 'met' not 'ryst' )
+  define CC as ( test hop 2 'l' )
+
+
+  /* The system of endings, as given in Appendix A. */
+
+  define endings as (
+    [substring] among(
+    'alistically' B 'arizability' A 'izationally' B
+
+     'antialness' A  'arisations' A  'arizations' A  'entialness' A
+
+      'allically' C   'antaneous' A   'antiality' A   'arisation' A
+      'arization' A   'ationally' B   'ativeness' A   'eableness' E
+      'entations' A   'entiality' A   'entialize' A   'entiation' A
+      'ionalness' A   'istically' A   'itousness' A   'izability' A
+      'izational' A
+
+       'ableness' A    'arizable' A    'entation' A    'entially' A
+       'eousness' A    'ibleness' A    'icalness' A    'ionalism' A
+       'ionality' A    'ionalize' A    'iousness' A    'izations' A
+       'lessness' A
+
+        'ability' A     'aically' A     'alistic' B     'alities' A
+        'ariness' E     'aristic' A     'arizing' A     'ateness' A
+        'atingly' A     'ational' B     'atively' A     'ativism' A
+        'elihood' E     'encible' A     'entally' A     'entials' A
+        'entiate' A     'entness' A     'fulness' A     'ibility' A
+        'icalism' A     'icalist' A     'icality' A     'icalize' A
+        'ication' G     'icianry' A     'ination' A     'ingness' A
+        'ionally' A     'isation' A     'ishness' A     'istical' A
+        'iteness' A     'iveness' A     'ivistic' A     'ivities' A
+        'ization' F     'izement' A     'oidally' A     'ousness' A
+
+         'aceous' A      'acious' B      'action' G      'alness' A
+         'ancial' A      'ancies' A      'ancing' B      'ariser' A
+         'arized' A      'arizer' A      'atable' A      'ations' B
+         'atives' A      'eature' Z      'efully' A      'encies' A
+         'encing' A      'ential' A      'enting' C      'entist' A
+         'eously' A      'ialist' A      'iality' A      'ialize' A
+         'ically' A      'icance' A      'icians' A      'icists' A
+         'ifully' A      'ionals' A      'ionate' D      'ioning' A
+         'ionist' A      'iously' A      'istics' A      'izable' E
+         'lessly' A      'nesses' A      'oidism' A
+
+          'acies' A       'acity' A       'aging' B       'aical' A
+          'alist' A       'alism' B       'ality' A       'alize' A
+          'allic'BB       'anced' B       'ances' B       'antic' C
+          'arial' A       'aries' A       'arily' A       'arity' B
+          'arize' A       'aroid' A       'ately' A       'ating' I
+          'ation' B       'ative' A       'ators' A       'atory' A
+          'ature' E       'early' Y       'ehood' A       'eless' A
+          'elity' A       'ement' A       'enced' A       'ences' A
+          'eness' E       'ening' E       'ental' A       'ented' C
+          'ently' A       'fully' A       'ially' A       'icant' A
+          'ician' A       'icide' A       'icism' A       'icist' A
+          'icity' A       'idine' I       'iedly' A       'ihood' A
+          'inate' A       'iness' A       'ingly' B       'inism' J
+          'inity'CC       'ional' A       'ioned' A       'ished' A
+          'istic' A       'ities' A       'itous' A       'ively' A
+          'ivity' A       'izers' F       'izing' F       'oidal' A
+          'oides' A       'otide' A       'ously' A
+
+           'able' A        'ably' A        'ages' B        'ally' B
+           'ance' B        'ancy' B        'ants' B        'aric' A
+           'arly' K        'ated' I        'ates' A        'atic' B
+           'ator' A        'ealy' Y        'edly' E        'eful' A
+           'eity' A        'ence' A        'ency' A        'ened' E
+           'enly' E        'eous' A        'hood' A        'ials' A
+           'ians' A        'ible' A        'ibly' A        'ical' A
+           'ides' L        'iers' A        'iful' A        'ines' M
+           'ings' N        'ions' B        'ious' A        'isms' B
+           'ists' A        'itic' H        'ized' F        'izer' F
+           'less' A        'lily' A        'ness' A        'ogen' A
+           'ward' A        'wise' A        'ying' B        'yish' A
+
+            'acy' A         'age' B         'aic' A         'als'BB
+            'ant' B         'ars' O         'ary' F         'ata' A
+            'ate' A         'eal' Y         'ear' Y         'ely' E
+            'ene' E         'ent' C         'ery' E         'ese' A
+            'ful' A         'ial' A         'ian' A         'ics' A
+            'ide' L         'ied' A         'ier' A         'ies' P
+            'ily' A         'ine' M         'ing' N         'ion' Q
+            'ish' C         'ism' B         'ist' A         'ite'AA
+            'ity' A         'ium' A         'ive' A         'ize' F
+            'oid' A         'one' R         'ous' A
+
+             'ae' A          'al'BB          'ar' X          'as' B
+             'ed' E          'en' F          'es' E          'ia' A
+             'ic' A          'is' A          'ly' B          'on' S
+             'or' T          'um' U          'us' V          'yl' R
+           '{'}s' A        's{'}' A
+
+              'a' A           'e' A           'i' A           'o' A
+              's' W           'y' B
+
+        (delete)
+    )
+  )
+
+  /* Undoubling is rule 1 of appendix C. */
+
+  define undouble as (
+    test substring among ('bb' 'dd' 'gg' 'll' 'mm' 'nn' 'pp' 'rr' 'ss'
+                          'tt')
+    [next] delete
+  )
+
+  /* The other appendix C rules can be done together. */
+
+  define respell as (
+    [substring] among (
+      'iev'  (<-'ief')
+      'uct'  (<-'uc')
+      'umpt' (<-'um')
+      'rpt'  (<-'rb')
+      'urs'  (<-'ur')
+      'istr' (<-'ister')
+      'metr' (<-'meter')
+      'olv'  (<-'olut')
+      'ul'   (not 'a' not 'i' not 'o' <-'l')
+      'bex'  (<-'bic')
+      'dex'  (<-'dic')
+      'pex'  (<-'pic')
+      'tex'  (<-'tic')
+      'ax'   (<-'ac')
+      'ex'   (<-'ec')
+      'ix'   (<-'ic')
+      'lux'  (<-'luc')
+      'uad'  (<-'uas')
+      'vad'  (<-'vas')
+      'cid'  (<-'cis')
+      'lid'  (<-'lis')
+      'erid' (<-'eris')
+      'pand' (<-'pans')
+      'end'  (not 's' <-'ens')
+      'ond'  (<-'ons')
+      'lud'  (<-'lus')
+      'rud'  (<-'rus')
+      'her'  (not 'p' not 't' <-'hes')
+      'mit'  (<-'mis')
+      'ent'  (not 'm' <-'ens')
+        /* 'ent' was 'end' in the 1968 paper - a typo. */
+      'ert'  (<-'ers')
+      'et'   (not 'n' <-'es')
+      'yt'   (<-'ys')
+      'yz'   (<-'ys')
+    )
+  )
+)
+
+define stem as (
+
+  backwards (
+    do endings
+    do undouble
+    do respell
+  )
+)
+
diff --git a/contrib/snowball/algorithms/norwegian/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/norwegian/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..94a071653
--- /dev/null
+++ b/contrib/snowball/algorithms/norwegian/stem_ISO_8859_1.sbl
@@ -0,0 +1,80 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+)
+
+externals ( stem )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef ae   hex 'E6'
+stringdef ao   hex 'E5'
+stringdef o/   hex 'F8'
+
+define v 'aeiouy{ae}{ao}{o/}'
+
+define s_ending  'bcdfghjlmnoprtvyz'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    test ( hop 3 setmark x )
+    goto v  gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
+            'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
+            'hetens' 'ers' 'ets' 'et' 'het' 'ast'
+                (delete)
+            's'
+                (s_ending or ('k' non-v) delete)
+            'erte' 'ert'
+                (<-'er')
+        )
+    )
+
+    define consonant_pair as (
+        test (
+            setlimit tomark p1 for ([substring])
+            among(
+                'dt' 'vt'
+            )
+        )
+        next] delete
+    )
+
+    define other_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+            'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
+            'hetslov'
+                (delete)
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+    )
+)
diff --git a/contrib/snowball/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 000000000..f57483354
--- /dev/null
+++ b/contrib/snowball/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,80 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+)
+
+externals ( stem )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef ae   hex '91'
+stringdef ao   hex '86'
+stringdef o/   hex '9B'
+
+define v 'aeiouy{ae}{ao}{o/}'
+
+define s_ending  'bcdfghjlmnoprtvyz'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    test ( hop 3 setmark x )
+    goto v  gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
+            'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
+            'hetens' 'ers' 'ets' 'et' 'het' 'ast'
+                (delete)
+            's'
+                (s_ending or ('k' non-v) delete)
+            'erte' 'ert'
+                (<-'er')
+        )
+    )
+
+    define consonant_pair as (
+        test (
+            setlimit tomark p1 for ([substring])
+            among(
+                'dt' 'vt'
+            )
+        )
+        next] delete
+    )
+
+    define other_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+            'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
+            'hetslov'
+                (delete)
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+    )
+)
diff --git a/contrib/snowball/algorithms/porter/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/porter/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..9533b7932
--- /dev/null
+++ b/contrib/snowball/algorithms/porter/stem_ISO_8859_1.sbl
@@ -0,0 +1,139 @@
+integers ( p1 p2 )
+booleans ( Y_found )
+
+routines (
+   shortv
+   R1 R2
+   Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5a Step_5b
+)
+
+externals ( stem )
+
+groupings ( v v_WXY )
+
+define v        'aeiouy'
+define v_WXY    v + 'wxY'
+
+backwardmode (
+
+    define shortv as ( non-v_WXY v non-v )
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define Step_1a as (
+        [substring] among (
+            'sses' (<-'ss')
+            'ies'  (<-'i')
+            'ss'   ()
+            's'    (delete)
+        )
+    )
+
+    define Step_1b as (
+        [substring] among (
+            'eed'  (R1 <-'ee')
+            'ed'
+            'ing' (
+                test gopast v  delete
+                test substring among(
+                    'at' 'bl' 'iz'
+                         (<+ 'e')
+                    'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
+                    // ignoring double c, h, j, k, q, v, w, and x
+                         ([next]  delete)
+                    ''   (atmark p1  test shortv  <+ 'e')
+                )
+            )
+        )
+    )
+
+    define Step_1c as (
+        ['y' or 'Y']
+        gopast v
+        <-'i'
+    )
+
+    define Step_2 as (
+        [substring] R1 among (
+            'tional'  (<-'tion')
+            'enci'    (<-'ence')
+            'anci'    (<-'ance')
+            'abli'    (<-'able')
+            'entli'   (<-'ent')
+            'eli'     (<-'e')
+            'izer' 'ization'
+                      (<-'ize')
+            'ational' 'ation' 'ator'
+                      (<-'ate')
+            'alli'    (<-'al')
+            'alism' 'aliti'
+                      (<-'al')
+            'fulness' (<-'ful')
+            'ousli' 'ousness'
+                      (<-'ous')
+            'iveness' 'iviti'
+                      (<-'ive')
+            'biliti'  (<-'ble')
+        )
+    )
+
+    define Step_3 as (
+        [substring] R1 among (
+            'alize'   (<-'al')
+            'icate' 'iciti' 'ical'
+                      (<-'ic')
+            'ative' 'ful' 'ness'
+                      (delete)
+        )
+    )
+
+    define Step_4 as (
+        [substring] R2 among (
+            'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
+            'ment' 'ent' 'ou' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
+                      (delete)
+            'ion'     ('s' or 't' delete)
+        )
+    )
+
+    define Step_5a as (
+        ['e']
+        R2 or (R1 not shortv)
+        delete
+    )
+
+    define Step_5b as (
+        ['l']
+        R2 'l'
+        delete
+    )
+)
+
+define stem as (
+
+    unset Y_found
+    do ( ['y'] <-'Y' set Y_found)
+    do repeat(goto (v ['y']) <-'Y' set Y_found)
+
+    $p1 = limit
+    $p2 = limit
+    do(
+        gopast v  gopast non-v  setmark p1
+        gopast v  gopast non-v  setmark p2
+    )
+
+    backwards (
+        do Step_1a
+        do Step_1b
+        do Step_1c
+        do Step_2
+        do Step_3
+        do Step_4
+        do Step_5a
+        do Step_5b
+    )
+
+    do(Y_found  repeat(goto (['Y']) <-'y'))
+
+)
diff --git a/contrib/snowball/algorithms/portuguese/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/portuguese/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..3e7da08d4
--- /dev/null
+++ b/contrib/snowball/algorithms/portuguese/stem_ISO_8859_1.sbl
@@ -0,0 +1,218 @@
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           standard_suffix
+           verb_suffix
+           residual_suffix
+           residual_form
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a'   hex 'E1'  // a-acute
+stringdef a^   hex 'E2'  // a-circumflex e.g. 'bota^nico
+stringdef e'   hex 'E9'  // e-acute
+stringdef e^   hex 'EA'  // e-circumflex
+stringdef i'   hex 'ED'  // i-acute
+stringdef o^   hex 'F4'  // o-circumflex
+stringdef o'   hex 'F3'  // o-acute
+stringdef u'   hex 'FA'  // u-acute
+stringdef c,   hex 'E7'  // c-cedilla
+
+stringdef a~   hex 'E3'  // a-tilde
+stringdef o~   hex 'F5'  // o-tilde
+
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
+
+define prelude as repeat (
+    [substring] among(
+        '{a~}' (<- 'a~')
+        '{o~}' (<- 'o~')
+        ''     (next)
+    ) //or next
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+    [substring] among(
+        'a~' (<- '{a~}')
+        'o~' (<- '{o~}')
+        ''   (next)
+    ) //or next
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        [substring] among(
+
+            'eza' 'ezas'
+            'ico' 'ica' 'icos' 'icas'
+            'ismo' 'ismos'
+            '{a'}vel'
+            '{i'}vel'
+            'ista' 'istas'
+            'oso' 'osa' 'osos' 'osas'
+            'amento' 'amentos'
+            'imento' 'imentos'
+
+           'adora' 'ador' 'a{c,}a~o'
+           'adoras' 'adores' 'a{c,}o~es'  // no -ic test
+           'ante' 'antes' '{a^}ncia' // Note 1
+            (
+                R2 delete
+            )
+            'log{i'}a'
+            'log{i'}as'
+            (
+                R2 <- 'log'
+            )
+            'uci{o'}n' 'uciones'
+            (
+                R2 <- 'u'
+            )
+            '{e^}ncia' '{e^}ncias'
+            (
+                R2 <- 'ente'
+            )
+            'amente'
+            (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' (['at'] R2 delete)
+                        'os'
+                        'ic'
+                        'ad'
+                    )
+                )
+            )
+            'mente'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'ante' // Note 1
+                        'avel'
+                        '{i'}vel' (R2 delete)
+                    )
+                )
+            )
+            'idade'
+            'idades'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil'
+                        'ic'
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'iva' 'ivo'
+            'ivas' 'ivos'
+            (
+                R2 delete
+                try (
+                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
+                )
+            )
+            'ira' 'iras'
+            (
+                RV 'e'  // -eira -eiras usually non-verbal
+                <- 'ir'
+            )
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
+            'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
+            'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
+            'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
+            'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
+            'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
+            'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
+            'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
+            'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
+            'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
+            '{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
+            '{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
+            '{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
+            'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
+            'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
+            '{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'
+
+            'ira' 'iras'
+                (delete)
+        )
+    )
+
+    define residual_suffix as (
+        [substring] among(
+            'os'
+            'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
+                ( RV delete )
+        )
+    )
+
+    define residual_form as (
+        [substring] among(
+            'e' '{e'}' '{e^}'
+                ( RV delete [('u'] test 'g') or
+                             ('i'] test 'c') RV delete )
+            '{c,}' (<-'c')
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do (
+            ( ( standard_suffix or verb_suffix )
+              and do ( ['i'] test 'c' RV delete )
+            )
+            or residual_suffix
+        )
+        do residual_form
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
diff --git a/contrib/snowball/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 000000000..4d6c85214
--- /dev/null
+++ b/contrib/snowball/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,218 @@
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           standard_suffix
+           verb_suffix
+           residual_suffix
+           residual_form
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a'   hex 'A0'  // a-acute
+stringdef a^   hex '83'  // a-circumflex e.g. 'bota^nico
+stringdef e'   hex '82'  // e-acute
+stringdef e^   hex '88'  // e-circumflex
+stringdef i'   hex 'A1'  // i-acute
+stringdef o^   hex '93'  // o-circumflex
+stringdef o'   hex 'A2'  // o-acute
+stringdef u'   hex 'A3'  // u-acute
+stringdef c,   hex '87'  // c-cedilla
+
+stringdef a~   hex 'C6'  // a-tilde
+stringdef o~   hex 'E4'  // o-tilde
+
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
+
+define prelude as repeat (
+    [substring] among(
+        '{a~}' (<- 'a~')
+        '{o~}' (<- 'o~')
+        ''     (next)
+    ) //or next
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+    [substring] among(
+        'a~' (<- '{a~}')
+        'o~' (<- '{o~}')
+        ''   (next)
+    ) //or next
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        [substring] among(
+
+            'eza' 'ezas'
+            'ico' 'ica' 'icos' 'icas'
+            'ismo' 'ismos'
+            '{a'}vel'
+            '{i'}vel'
+            'ista' 'istas'
+            'oso' 'osa' 'osos' 'osas'
+            'amento' 'amentos'
+            'imento' 'imentos'
+
+           'adora' 'ador' 'a{c,}a~o'
+           'adoras' 'adores' 'a{c,}o~es'  // no -ic test
+           'ante' 'antes' '{a^}ncia' // Note 1
+            (
+                R2 delete
+            )
+            'log{i'}a'
+            'log{i'}as'
+            (
+                R2 <- 'log'
+            )
+            'uci{o'}n' 'uciones'
+            (
+                R2 <- 'u'
+            )
+            '{e^}ncia' '{e^}ncias'
+            (
+                R2 <- 'ente'
+            )
+            'amente'
+            (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' (['at'] R2 delete)
+                        'os'
+                        'ic'
+                        'ad'
+                    )
+                )
+            )
+            'mente'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'ante' // Note 1
+                        'avel'
+                        '{i'}vel' (R2 delete)
+                    )
+                )
+            )
+            'idade'
+            'idades'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil'
+                        'ic'
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'iva' 'ivo'
+            'ivas' 'ivos'
+            (
+                R2 delete
+                try (
+                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
+                )
+            )
+            'ira' 'iras'
+            (
+                RV 'e'  // -eira -eiras usually non-verbal
+                <- 'ir'
+            )
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
+            'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
+            'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
+            'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
+            'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
+            'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
+            'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
+            'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
+            'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
+            'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
+            '{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
+            '{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
+            '{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
+            'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
+            'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
+            '{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'
+
+            'ira' 'iras'
+                (delete)
+        )
+    )
+
+    define residual_suffix as (
+        [substring] among(
+            'os'
+            'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
+                ( RV delete )
+        )
+    )
+
+    define residual_form as (
+        [substring] among(
+            'e' '{e'}' '{e^}'
+                ( RV delete [('u'] test 'g') or
+                             ('i'] test 'c') RV delete )
+            '{c,}' (<-'c')
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do (
+            ( ( standard_suffix or verb_suffix )
+              and do ( ['i'] test 'c' RV delete )
+            )
+            or residual_suffix
+        )
+        do residual_form
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
diff --git a/contrib/snowball/algorithms/romanian/stem_ISO_8859_2.sbl b/contrib/snowball/algorithms/romanian/stem_ISO_8859_2.sbl
new file mode 100644
index 000000000..48a148321
--- /dev/null
+++ b/contrib/snowball/algorithms/romanian/stem_ISO_8859_2.sbl
@@ -0,0 +1,236 @@
+
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           step_0
+           standard_suffix combo_suffix
+           verb_suffix
+           vowel_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+booleans  ( standard_suffix_removed )
+
+stringescapes {}
+
+/* special characters */
+
+stringdef a^   hex 'E2'  // a circumflex
+stringdef i^   hex 'EE'  // i circumflex
+stringdef a+   hex 'E3'  // a breve
+stringdef s,   hex 'BA'  // s cedilla
+stringdef t,   hex 'FE'  // t cedilla
+
+define v 'aeiou{a^}{i^}{a+}'
+
+define prelude as (
+    repeat goto (
+        v [ ('u' ] v <- 'U') or
+            ('i' ] v <- 'I')
+    )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I'  (<- 'i')
+        'U'  (<- 'u')
+        ''   (next)
+    )
+
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define step_0 as (
+        [substring] R1 among(
+            'ul' 'ului'
+                ( delete )
+            'aua'
+                ( <-'a' )
+            'ea' 'ele' 'elor'
+                ( <-'e' )
+            'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
+                ( <-'i')
+            'ile'
+                ( not 'ab' <- 'i' )
+            'atei'
+                ( <- 'at' )
+            'a{t,}ie' 'a{t,}ia'
+                ( <- 'a{t,}i' )
+        )
+    )
+
+    define combo_suffix as test (
+        [substring] R1 (
+            among(
+            /* 'IST'. alternative: include the following
+                'alism' 'alisme'
+                'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
+                    <- 'al'
+                )
+            */
+                'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
+                    <- 'abil'
+                )
+                'ibilitate' (
+                    <- 'ibil'
+                )
+                'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
+                    <- 'iv'
+                )
+                'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
+                'icator' 'icatori'
+                'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
+                'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
+                    <- 'ic'
+                )
+                'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
+                'atoare' 'ator' 'atori'
+                '{a+}toare' '{a+}tor' '{a+}tori' (
+                    <- 'at'
+                )
+                'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
+                'itoare' 'itor' 'itori' (
+                    <- 'it'
+                )
+            )
+            set standard_suffix_removed
+        )
+    )
+
+    define standard_suffix as (
+        unset standard_suffix_removed
+        repeat combo_suffix
+        [substring] R2 (
+            among(
+
+                // past participle is treated here, rather than
+                // as a verb ending:
+                'at' 'ata' 'at{a+}' 'ati' 'ate'
+                'ut' 'uta' 'ut{a+}' 'uti' 'ute'
+                'it' 'ita' 'it{a+}' 'iti' 'ite'
+
+                'ic' 'ica' 'ice' 'ici' 'ic{a+}'
+                'abil' 'abila' 'abile' 'abili' 'abil{a+}'
+                'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
+                'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
+                'ant' 'anta' 'ante' 'anti' 'ant{a+}'
+                'ator' 'atori'
+                'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
+                'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
+                    delete
+                )
+                'iune' 'iuni' (
+                    '{t,}'] <- 't'
+                )
+                'ism' 'isme'
+                'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
+                    <- 'ist'
+                    /* 'IST'. alternative: remove with <- '' */
+                )
+            )
+            set standard_suffix_removed
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            // 'long' infinitive:
+            'are' 'ere' 'ire' '{a^}re'
+
+            // gerund:
+            'ind' '{a^}nd'
+            'indu' '{a^}ndu'
+
+            'eze'
+            'easc{a+}'
+            // present:
+            'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
+            'e{s,}te'
+            '{a+}sc' '{a+}{s,}ti'
+            '{a+}{s,}te'
+
+            // imperfect:
+            'am' 'ai' 'au'
+            'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
+            'iam' 'iai' 'ia' 'ia{t,}i' 'iau'
+
+            // past: // (not 'ii')
+            'ui'
+            'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
+            'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
+            'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
+            '{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
+
+            // pluferfect:
+            'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
+            'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
+            '{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
+            '{a^}ser{a+}'
+            'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'
+
+                ( non-v or 'u'  delete )
+
+            // present:
+            '{a+}m' 'a{t,}i'
+            'em' 'e{t,}i'
+            'im' 'i{t,}i'
+            '{a^}m' '{a^}{t,}i'
+
+            // past:
+            'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
+            'sei' 'se'
+
+            // pluperfect:
+            'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
+                (delete)
+        )
+    )
+
+    define vowel_suffix as (
+        [substring] RV among (
+            'a' 'e' 'i' 'ie' '{a+}' ( delete )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do step_0
+        do standard_suffix
+        do ( standard_suffix_removed or verb_suffix )
+        do vowel_suffix
+    )
+    do postlude
+)
+
diff --git a/contrib/snowball/algorithms/romanian/stem_Unicode.sbl b/contrib/snowball/algorithms/romanian/stem_Unicode.sbl
new file mode 100644
index 000000000..09aec6429
--- /dev/null
+++ b/contrib/snowball/algorithms/romanian/stem_Unicode.sbl
@@ -0,0 +1,236 @@
+
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           step_0
+           standard_suffix combo_suffix
+           verb_suffix
+           vowel_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+booleans  ( standard_suffix_removed )
+
+stringescapes {}
+
+/* special characters */
+
+stringdef a^   hex '0E2'  // a circumflex
+stringdef i^   hex '0EE'  // i circumflex
+stringdef a+   hex '103'  // a breve
+stringdef s,   hex '15F'  // s cedilla
+stringdef t,   hex '163'  // t cedilla
+
+define v 'aeiou{a^}{i^}{a+}'
+
+define prelude as (
+    repeat goto (
+        v [ ('u' ] v <- 'U') or
+            ('i' ] v <- 'I')
+    )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I'  (<- 'i')
+        'U'  (<- 'u')
+        ''   (next)
+    )
+
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define step_0 as (
+        [substring] R1 among(
+            'ul' 'ului'
+                ( delete )
+            'aua'
+                ( <-'a' )
+            'ea' 'ele' 'elor'
+                ( <-'e' )
+            'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
+                ( <-'i')
+            'ile'
+                ( not 'ab' <- 'i' )
+            'atei'
+                ( <- 'at' )
+            'a{t,}ie' 'a{t,}ia'
+                ( <- 'a{t,}i' )
+        )
+    )
+
+    define combo_suffix as test (
+        [substring] R1 (
+            among(
+            /* 'IST'. alternative: include the following
+                'alism' 'alisme'
+                'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
+                    <- 'al'
+                )
+            */
+                'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
+                    <- 'abil'
+                )
+                'ibilitate' (
+                    <- 'ibil'
+                )
+                'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
+                    <- 'iv'
+                )
+                'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
+                'icator' 'icatori'
+                'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
+                'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
+                    <- 'ic'
+                )
+                'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
+                'atoare' 'ator' 'atori'
+                '{a+}toare' '{a+}tor' '{a+}tori' (
+                    <- 'at'
+                )
+                'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
+                'itoare' 'itor' 'itori' (
+                    <- 'it'
+                )
+            )
+            set standard_suffix_removed
+        )
+    )
+
+    define standard_suffix as (
+        unset standard_suffix_removed
+        repeat combo_suffix
+        [substring] R2 (
+            among(
+
+                // past participle is treated here, rather than
+                // as a verb ending:
+                'at' 'ata' 'at{a+}' 'ati' 'ate'
+                'ut' 'uta' 'ut{a+}' 'uti' 'ute'
+                'it' 'ita' 'it{a+}' 'iti' 'ite'
+
+                'ic' 'ica' 'ice' 'ici' 'ic{a+}'
+                'abil' 'abila' 'abile' 'abili' 'abil{a+}'
+                'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
+                'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
+                'ant' 'anta' 'ante' 'anti' 'ant{a+}'
+                'ator' 'atori'
+                'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
+                'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
+                    delete
+                )
+                'iune' 'iuni' (
+                    '{t,}'] <- 't'
+                )
+                'ism' 'isme'
+                'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
+                    <- 'ist'
+                    /* 'IST'. alternative: remove with <- '' */
+                )
+            )
+            set standard_suffix_removed
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            // 'long' infinitive:
+            'are' 'ere' 'ire' '{a^}re'
+
+            // gerund:
+            'ind' '{a^}nd'
+            'indu' '{a^}ndu'
+
+            'eze'
+            'easc{a+}'
+            // present:
+            'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
+            'e{s,}te'
+            '{a+}sc' '{a+}{s,}ti'
+            '{a+}{s,}te'
+
+            // imperfect:
+            'am' 'ai' 'au'
+            'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
+            'iam' 'iai' 'ia' 'ia{t,}i' 'iau'
+
+            // past: // (not 'ii')
+            'ui'
+            'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
+            'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
+            'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
+            '{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
+
+            // pluferfect:
+            'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
+            'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
+            '{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
+            '{a^}ser{a+}'
+            'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'
+
+                ( non-v or 'u'  delete )
+
+            // present:
+            '{a+}m' 'a{t,}i'
+            'em' 'e{t,}i'
+            'im' 'i{t,}i'
+            '{a^}m' '{a^}{t,}i'
+
+            // past:
+            'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
+            'sei' 'se'
+
+            // pluperfect:
+            'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
+                (delete)
+        )
+    )
+
+    define vowel_suffix as (
+        [substring] RV among (
+            'a' 'e' 'i' 'ie' '{a+}' ( delete )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do step_0
+        do standard_suffix
+        do ( standard_suffix_removed or verb_suffix )
+        do vowel_suffix
+    )
+    do postlude
+)
+
diff --git a/contrib/snowball/algorithms/russian/stem_KOI8_R.sbl b/contrib/snowball/algorithms/russian/stem_KOI8_R.sbl
new file mode 100644
index 000000000..cdacb19c4
--- /dev/null
+++ b/contrib/snowball/algorithms/russian/stem_KOI8_R.sbl
@@ -0,0 +1,217 @@
+stringescapes {}
+
+/* the 32 Cyrillic letters in the KOI8-R coding scheme, and represented
+   in Latin characters following the conventions of the standard Library
+   of Congress transliteration: */
+
+stringdef a    hex 'C1'
+stringdef b    hex 'C2'
+stringdef v    hex 'D7'
+stringdef g    hex 'C7'
+stringdef d    hex 'C4'
+stringdef e    hex 'C5'
+stringdef zh   hex 'D6'
+stringdef z    hex 'DA'
+stringdef i    hex 'C9'
+stringdef i`   hex 'CA'
+stringdef k    hex 'CB'
+stringdef l    hex 'CC'
+stringdef m    hex 'CD'
+stringdef n    hex 'CE'
+stringdef o    hex 'CF'
+stringdef p    hex 'D0'
+stringdef r    hex 'D2'
+stringdef s    hex 'D3'
+stringdef t    hex 'D4'
+stringdef u    hex 'D5'
+stringdef f    hex 'C6'
+stringdef kh   hex 'C8'
+stringdef ts   hex 'C3'
+stringdef ch   hex 'DE'
+stringdef sh   hex 'DB'
+stringdef shch hex 'DD'
+stringdef "    hex 'DF'
+stringdef y    hex 'D9'
+stringdef '    hex 'D8'
+stringdef e`   hex 'DC'
+stringdef iu   hex 'C0'
+stringdef ia   hex 'D1'
+
+routines ( mark_regions R2
+           perfective_gerund
+           adjective
+           adjectival
+           reflexive
+           verb
+           noun
+           derivational
+           tidy_up
+)
+
+externals ( stem )
+
+integers ( pV p2 )
+
+groupings ( v )
+
+define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
+
+define mark_regions as (
+
+    $pV = limit
+    $p2 = limit
+    do (
+        gopast v  setmark pV  gopast non-v
+        gopast v  gopast non-v  setmark p2
+       )
+)
+
+backwardmode (
+
+    define R2 as $p2 <= cursor
+
+    define perfective_gerund as (
+        [substring] among (
+            '{v}'
+            '{v}{sh}{i}'
+            '{v}{sh}{i}{s}{'}'
+                ('{a}' or '{ia}' delete)
+            '{i}{v}'
+            '{i}{v}{sh}{i}'
+            '{i}{v}{sh}{i}{s}{'}'
+            '{y}{v}'
+            '{y}{v}{sh}{i}'
+            '{y}{v}{sh}{i}{s}{'}'
+                (delete)
+        )
+    )
+
+    define adjective as (
+        [substring] among (
+            '{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
+            '{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
+            '{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
+            '{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
+            '{ia}{ia}'
+                        // and -
+            '{o}{iu}'   // - which is somewhat archaic
+            '{e}{iu}'   // - soft form of {o}{iu}
+                (delete)
+        )
+    )
+
+    define adjectival as (
+        adjective
+
+        /* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
+           nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
+           errors. Removing im, uem, enn creates too many errors.
+        */
+
+        try (
+            [substring] among (
+                '{e}{m}'                  // present passive participle
+                '{n}{n}'                  // adjective from past passive participle
+                '{v}{sh}'                 // past active participle
+                '{iu}{shch}' '{shch}'     // present active participle
+                    ('{a}' or '{ia}' delete)
+
+     //but not  '{i}{m}' '{u}{e}{m}'      // present passive participle
+     //or       '{e}{n}{n}'               // adjective from past passive participle
+
+                '{i}{v}{sh}' '{y}{v}{sh}'// past active participle
+                '{u}{iu}{shch}'          // present active participle
+                    (delete)
+            )
+        )
+
+    )
+
+    define reflexive as (
+        [substring] among (
+            '{s}{ia}'
+            '{s}{'}'
+                (delete)
+        )
+    )
+
+    define verb as (
+        [substring] among (
+            '{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
+            '{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
+            '{n}{y}' '{t}{'}' '{e}{sh}{'}'
+
+            '{n}{n}{o}'
+                ('{a}' or '{ia}' delete)
+
+            '{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
+            '{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
+            '{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
+            '{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
+            '{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
+            '{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
+                (delete)
+            /* note the short passive participle tests:
+               '{n}{a}' '{n}' '{n}{o}' '{n}{y}'
+               '{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
+            */
+        )
+    )
+
+    define noun as (
+        [substring] among (
+            '{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
+            '{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
+            '{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
+            '{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
+            '{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
+            '{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
+                (delete)
+            /* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
+               '{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
+               omitted - they only occur on 12 words.
+            */
+        )
+    )
+
+    define derivational as (
+        [substring] R2 among (
+            '{o}{s}{t}'
+            '{o}{s}{t}{'}'
+                (delete)
+        )
+    )
+
+    define tidy_up as (
+        [substring] among (
+
+            '{e}{i`}{sh}'
+            '{e}{i`}{sh}{e}'  // superlative forms
+               (delete
+                ['{n}'] '{n}' delete
+               )
+            '{n}'
+               ('{n}' delete) // e.g. -nno endings
+            '{'}'
+               (delete)  // with some slight false conflations
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards setlimit tomark pV for (
+        do (
+             perfective_gerund or
+             ( try reflexive
+               adjectival or verb or noun
+             )
+        )
+        try([ '{i}' ] delete)
+        // because noun ending -i{iu} is being treated as verb ending -{iu}
+
+        do derivational
+        do tidy_up
+    )
+)
diff --git a/contrib/snowball/algorithms/russian/stem_Unicode.sbl b/contrib/snowball/algorithms/russian/stem_Unicode.sbl
new file mode 100644
index 000000000..9e1a93f93
--- /dev/null
+++ b/contrib/snowball/algorithms/russian/stem_Unicode.sbl
@@ -0,0 +1,215 @@
+stringescapes {}
+
+/* the 32 Cyrillic letters in Unicode */
+
+stringdef a    hex '430'
+stringdef b    hex '431'
+stringdef v    hex '432'
+stringdef g    hex '433'
+stringdef d    hex '434'
+stringdef e    hex '435'
+stringdef zh   hex '436'
+stringdef z    hex '437'
+stringdef i    hex '438'
+stringdef i`   hex '439'
+stringdef k    hex '43A'
+stringdef l    hex '43B'
+stringdef m    hex '43C'
+stringdef n    hex '43D'
+stringdef o    hex '43E'
+stringdef p    hex '43F'
+stringdef r    hex '440'
+stringdef s    hex '441'
+stringdef t    hex '442'
+stringdef u    hex '443'
+stringdef f    hex '444'
+stringdef kh   hex '445'
+stringdef ts   hex '446'
+stringdef ch   hex '447'
+stringdef sh   hex '448'
+stringdef shch hex '449'
+stringdef "    hex '44A'
+stringdef y    hex '44B'
+stringdef '    hex '44C'
+stringdef e`   hex '44D'
+stringdef iu   hex '44E'
+stringdef ia   hex '44F'
+
+routines ( mark_regions R2
+           perfective_gerund
+           adjective
+           adjectival
+           reflexive
+           verb
+           noun
+           derivational
+           tidy_up
+)
+
+externals ( stem )
+
+integers ( pV p2 )
+
+groupings ( v )
+
+define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
+
+define mark_regions as (
+
+    $pV = limit
+    $p2 = limit
+    do (
+        gopast v  setmark pV  gopast non-v
+        gopast v  gopast non-v  setmark p2
+       )
+)
+
+backwardmode (
+
+    define R2 as $p2 <= cursor
+
+    define perfective_gerund as (
+        [substring] among (
+            '{v}'
+            '{v}{sh}{i}'
+            '{v}{sh}{i}{s}{'}'
+                ('{a}' or '{ia}' delete)
+            '{i}{v}'
+            '{i}{v}{sh}{i}'
+            '{i}{v}{sh}{i}{s}{'}'
+            '{y}{v}'
+            '{y}{v}{sh}{i}'
+            '{y}{v}{sh}{i}{s}{'}'
+                (delete)
+        )
+    )
+
+    define adjective as (
+        [substring] among (
+            '{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
+            '{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
+            '{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
+            '{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
+            '{ia}{ia}'
+                        // and -
+            '{o}{iu}'   // - which is somewhat archaic
+            '{e}{iu}'   // - soft form of {o}{iu}
+                (delete)
+        )
+    )
+
+    define adjectival as (
+        adjective
+
+        /* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
+           nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
+           errors. Removing im, uem, enn creates too many errors.
+        */
+
+        try (
+            [substring] among (
+                '{e}{m}'                  // present passive participle
+                '{n}{n}'                  // adjective from past passive participle
+                '{v}{sh}'                 // past active participle
+                '{iu}{shch}' '{shch}'     // present active participle
+                    ('{a}' or '{ia}' delete)
+
+     //but not  '{i}{m}' '{u}{e}{m}'      // present passive participle
+     //or       '{e}{n}{n}'               // adjective from past passive participle
+
+                '{i}{v}{sh}' '{y}{v}{sh}'// past active participle
+                '{u}{iu}{shch}'          // present active participle
+                    (delete)
+            )
+        )
+
+    )
+
+    define reflexive as (
+        [substring] among (
+            '{s}{ia}'
+            '{s}{'}'
+                (delete)
+        )
+    )
+
+    define verb as (
+        [substring] among (
+            '{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
+            '{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
+            '{n}{y}' '{t}{'}' '{e}{sh}{'}'
+
+            '{n}{n}{o}'
+                ('{a}' or '{ia}' delete)
+
+            '{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
+            '{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
+            '{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
+            '{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
+            '{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
+            '{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
+                (delete)
+            /* note the short passive participle tests:
+               '{n}{a}' '{n}' '{n}{o}' '{n}{y}'
+               '{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
+            */
+        )
+    )
+
+    define noun as (
+        [substring] among (
+            '{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
+            '{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
+            '{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
+            '{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
+            '{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
+            '{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
+                (delete)
+            /* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
+               '{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
+               omitted - they only occur on 12 words.
+            */
+        )
+    )
+
+    define derivational as (
+        [substring] R2 among (
+            '{o}{s}{t}'
+            '{o}{s}{t}{'}'
+                (delete)
+        )
+    )
+
+    define tidy_up as (
+        [substring] among (
+
+            '{e}{i`}{sh}'
+            '{e}{i`}{sh}{e}'  // superlative forms
+               (delete
+                ['{n}'] '{n}' delete
+               )
+            '{n}'
+               ('{n}' delete) // e.g. -nno endings
+            '{'}'
+               (delete)  // with some slight false conflations
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards setlimit tomark pV for (
+        do (
+             perfective_gerund or
+             ( try reflexive
+               adjectival or verb or noun
+             )
+        )
+        try([ '{i}' ] delete)
+        // because noun ending -i{iu} is being treated as verb ending -{iu}
+
+        do derivational
+        do tidy_up
+    )
+)
diff --git a/contrib/snowball/algorithms/spanish/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/spanish/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..9dee289cc
--- /dev/null
+++ b/contrib/snowball/algorithms/spanish/stem_ISO_8859_1.sbl
@@ -0,0 +1,230 @@
+routines (
+           postlude mark_regions
+           RV R1 R2
+           attached_pronoun
+           standard_suffix
+           y_verb_suffix
+           verb_suffix
+           residual_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a'   hex 'E1'  // a-acute
+stringdef e'   hex 'E9'  // e-acute
+stringdef i'   hex 'ED'  // i-acute
+stringdef o'   hex 'F3'  // o-acute
+stringdef u'   hex 'FA'  // u-acute
+stringdef u"   hex 'FC'  // u-diaeresis
+stringdef n~   hex 'F1'  // n-tilde
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+    [substring] among(
+        '{a'}' (<- 'a')
+        '{e'}' (<- 'e')
+        '{i'}' (<- 'i')
+        '{o'}' (<- 'o')
+        '{u'}' (<- 'u')
+        // and possibly {u"}->u here, or in prelude
+        ''     (next)
+    ) //or next
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define attached_pronoun as (
+        [substring] among(
+            'me' 'se'  'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
+            'las' 'les' 'los' 'nos'
+        )
+        substring RV among(
+            'i{e'}ndo' (] <- 'iendo')
+            '{a'}ndo'  (] <- 'ando')
+            '{a'}r'    (] <- 'ar')
+            '{e'}r'    (] <- 'er')
+            '{i'}r'    (] <- 'ir')
+            'ando'
+            'iendo'
+            'ar' 'er' 'ir'
+                       (delete)
+            'yendo'    ('u' delete)
+        )
+    )
+
+    define standard_suffix as (
+        [substring] among(
+
+            'anza' 'anzas'
+            'ico' 'ica' 'icos' 'icas'
+            'ismo' 'ismos'
+            'able' 'ables'
+            'ible' 'ibles'
+            'ista' 'istas'
+            'oso' 'osa' 'osos' 'osas'
+            'amiento' 'amientos'
+            'imiento' 'imientos'
+            (
+                R2 delete
+            )
+            'adora' 'ador' 'aci{o'}n'
+            'adoras' 'adores' 'aciones'
+            'ante' 'antes' 'ancia' 'ancias'// Note 1
+            (
+                R2 delete
+                try ( ['ic'] R2 delete )
+            )
+            'log{i'}a'
+            'log{i'}as'
+            (
+                R2 <- 'log'
+            )
+            'uci{o'}n' 'uciones'
+            (
+                R2 <- 'u'
+            )
+            'encia' 'encias'
+            (
+                R2 <- 'ente'
+            )
+            'amente'
+            (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' (['at'] R2 delete)
+                        'os'
+                        'ic'
+                        'ad'
+                    )
+                )
+            )
+            'mente'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'ante' // Note 1
+                        'able'
+                        'ible' (R2 delete)
+                    )
+                )
+            )
+            'idad'
+            'idades'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil'
+                        'ic'
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'iva' 'ivo'
+            'ivas' 'ivos'
+            (
+                R2 delete
+                try (
+                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
+                )
+            )
+        )
+    )
+
+    define y_verb_suffix as (
+        setlimit tomark pV for ([substring]) among(
+            'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
+            'yas' 'yes' 'yais' 'yamos'
+                ('u' delete)
+        )
+    )
+
+    define verb_suffix as (
+        setlimit tomark pV for ([substring]) among(
+
+            'en' 'es' '{e'}is' 'emos'
+                (try ('u' test 'g') ] delete)
+
+            'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
+            'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
+            'ar{e'}'
+            'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
+            'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
+            'er{e'}'
+            'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
+            'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
+            'ir{e'}'
+
+            'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
+            'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
+            'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
+            'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
+            'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
+            'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
+            'ierais'  'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
+            'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
+            '{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
+                (delete)
+        )
+    )
+
+    define residual_suffix as (
+        [substring] among(
+            'os'
+            'a' 'o' '{a'}' '{i'}' '{o'}'
+                ( RV delete )
+            'e' '{e'}'
+                ( RV delete try( ['u'] test 'g' RV delete ) )
+        )
+    )
+)
+
+define stem as (
+    do mark_regions
+    backwards (
+        do attached_pronoun
+        do ( standard_suffix or
+             y_verb_suffix or
+             verb_suffix
+           )
+        do residual_suffix
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
diff --git a/contrib/snowball/algorithms/spanish/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 000000000..db7a46201
--- /dev/null
+++ b/contrib/snowball/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,230 @@
+routines (
+           postlude mark_regions
+           RV R1 R2
+           attached_pronoun
+           standard_suffix
+           y_verb_suffix
+           verb_suffix
+           residual_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a'   hex 'A0'  // a-acute
+stringdef e'   hex '82'  // e-acute
+stringdef i'   hex 'A1'  // i-acute
+stringdef o'   hex 'A2'  // o-acute
+stringdef u'   hex 'A3'  // u-acute
+stringdef u"   hex '81'  // u-diaeresis
+stringdef n~   hex 'A4'  // n-tilde
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+    [substring] among(
+        '{a'}' (<- 'a')
+        '{e'}' (<- 'e')
+        '{i'}' (<- 'i')
+        '{o'}' (<- 'o')
+        '{u'}' (<- 'u')
+        // and possibly {u"}->u here, or in prelude
+        ''     (next)
+    ) //or next
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define attached_pronoun as (
+        [substring] among(
+            'me' 'se'  'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
+            'las' 'les' 'los' 'nos'
+        )
+        substring RV among(
+            'i{e'}ndo' (] <- 'iendo')
+            '{a'}ndo'  (] <- 'ando')
+            '{a'}r'    (] <- 'ar')
+            '{e'}r'    (] <- 'er')
+            '{i'}r'    (] <- 'ir')
+            'ando'
+            'iendo'
+            'ar' 'er' 'ir'
+                       (delete)
+            'yendo'    ('u' delete)
+        )
+    )
+
+    define standard_suffix as (
+        [substring] among(
+
+            'anza' 'anzas'
+            'ico' 'ica' 'icos' 'icas'
+            'ismo' 'ismos'
+            'able' 'ables'
+            'ible' 'ibles'
+            'ista' 'istas'
+            'oso' 'osa' 'osos' 'osas'
+            'amiento' 'amientos'
+            'imiento' 'imientos'
+            (
+                R2 delete
+            )
+            'adora' 'ador' 'aci{o'}n'
+            'adoras' 'adores' 'aciones'
+            'ante' 'antes' 'ancia' 'ancias'// Note 1
+            (
+                R2 delete
+                try ( ['ic'] R2 delete )
+            )
+            'log{i'}a'
+            'log{i'}as'
+            (
+                R2 <- 'log'
+            )
+            'uci{o'}n' 'uciones'
+            (
+                R2 <- 'u'
+            )
+            'encia' 'encias'
+            (
+                R2 <- 'ente'
+            )
+            'amente'
+            (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' (['at'] R2 delete)
+                        'os'
+                        'ic'
+                        'ad'
+                    )
+                )
+            )
+            'mente'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'ante' // Note 1
+                        'able'
+                        'ible' (R2 delete)
+                    )
+                )
+            )
+            'idad'
+            'idades'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil'
+                        'ic'
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'iva' 'ivo'
+            'ivas' 'ivos'
+            (
+                R2 delete
+                try (
+                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
+                )
+            )
+        )
+    )
+
+    define y_verb_suffix as (
+        setlimit tomark pV for ([substring]) among(
+            'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
+            'yas' 'yes' 'yais' 'yamos'
+                ('u' delete)
+        )
+    )
+
+    define verb_suffix as (
+        setlimit tomark pV for ([substring]) among(
+
+            'en' 'es' '{e'}is' 'emos'
+                (try ('u' test 'g') ] delete)
+
+            'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
+            'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
+            'ar{e'}'
+            'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
+            'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
+            'er{e'}'
+            'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
+            'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
+            'ir{e'}'
+
+            'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
+            'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
+            'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
+            'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
+            'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
+            'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
+            'ierais'  'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
+            'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
+            '{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
+                (delete)
+        )
+    )
+
+    define residual_suffix as (
+        [substring] among(
+            'os'
+            'a' 'o' '{a'}' '{i'}' '{o'}'
+                ( RV delete )
+            'e' '{e'}'
+                ( RV delete try( ['u'] test 'g' RV delete ) )
+        )
+    )
+)
+
+define stem as (
+    do mark_regions
+    backwards (
+        do attached_pronoun
+        do ( standard_suffix or
+             y_verb_suffix or
+             verb_suffix
+           )
+        do residual_suffix
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
diff --git a/contrib/snowball/algorithms/swedish/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/swedish/stem_ISO_8859_1.sbl
new file mode 100644
index 000000000..03ce1e22f
--- /dev/null
+++ b/contrib/snowball/algorithms/swedish/stem_ISO_8859_1.sbl
@@ -0,0 +1,72 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+)
+
+externals ( stem )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef ao   hex 'E5'
+stringdef o"   hex 'F6'
+
+define v 'aeiouy{a"}{ao}{o"}'
+
+define s_ending  'bcdfghjklmnoprtvy'
+
+define mark_regions as (
+
+    $p1 = limit
+    test ( hop 3 setmark x )
+    goto v gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
+            'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
+            'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
+            'hetens' 'erns' 'at' 'andet' 'het' 'ast'
+                (delete)
+            's'
+                (s_ending delete)
+        )
+    )
+
+    define consonant_pair as setlimit tomark p1 for (
+        among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
+        and ([next] delete)
+    )
+
+    define other_suffix as setlimit tomark p1 for (
+        [substring] among(
+            'lig' 'ig' 'els' (delete)
+            'l{o"}st'        (<-'l{o"}s')
+            'fullt'          (<-'full')
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+    )
+)
diff --git a/contrib/snowball/algorithms/swedish/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 000000000..1631f401a
--- /dev/null
+++ b/contrib/snowball/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,72 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+)
+
+externals ( stem )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a"   hex '84'
+stringdef ao   hex '86'
+stringdef o"   hex '94'
+
+define v 'aeiouy{a"}{ao}{o"}'
+
+define s_ending  'bcdfghjklmnoprtvy'
+
+define mark_regions as (
+
+    $p1 = limit
+    test ( hop 3 setmark x )
+    goto v gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
+            'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
+            'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
+            'hetens' 'erns' 'at' 'andet' 'het' 'ast'
+                (delete)
+            's'
+                (s_ending delete)
+        )
+    )
+
+    define consonant_pair as setlimit tomark p1 for (
+        among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
+        and ([next] delete)
+    )
+
+    define other_suffix as setlimit tomark p1 for (
+        [substring] among(
+            'lig' 'ig' 'els' (delete)
+            'l{o"}st'        (<-'l{o"}s')
+            'fullt'          (<-'full')
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+    )
+)
diff --git a/contrib/snowball/algorithms/turkish/stem_Unicode.sbl b/contrib/snowball/algorithms/turkish/stem_Unicode.sbl
new file mode 100644
index 000000000..16c02a51f
--- /dev/null
+++ b/contrib/snowball/algorithms/turkish/stem_Unicode.sbl
@@ -0,0 +1,477 @@
+/* Stemmer for Turkish
+	* author: Evren (Kapusuz) Çilden
+	* email: evren.kapusuz at gmail.com
+	* version: 1.0 (15.01.2007)
+	
+
+	* stems nominal verb suffixes
+	* stems nominal inflections
+	* more than one syllable word check
+	* (y,n,s,U) context check
+	* vowel harmony check
+	* last consonant check and conversion (b, c, d, ğ to p, ç, t, k)
+	
+	* The stemming algorithm is based on the paper "An Affix Stripping
+	* Morphological Analyzer for Turkish" by Gülşen Eryiğit and
+	* Eşref Adalı (Proceedings of the IAESTED International Conference
+	* ARTIFICIAL INTELLIGENCE AND APPLICATIONS, February 16-18,2004,
+	* Innsbruck, Austria
+	
+	* Turkish is an agglutinative language and has a very rich morphological
+	* structure. In Turkish, you can form many different words from a single stem
+	* by appending a sequence of suffixes. Eg. The word "doktoruymuşsunuz" means
+	* "You had been the doctor of him". The stem of the word is "doktor" and it
+	* takes three different suffixes -sU, -ymUs, and -sUnUz. The rules about
+	* the append order of suffixes can be clearly described as FSMs.
+	* The paper referenced above defines some FSMs for right to left
+	* morphological analysis. I generated a method for constructing snowball
+	* expressions from right to left FSMs for stemming suffixes.
+*/
+
+routines (
+	append_U_to_stems_ending_with_d_or_g // for preventing some overstemmings
+	check_vowel_harmony	// tests vowel harmony for suffixes
+	is_reserved_word	// tests whether current string is a reserved word ('ad','soyad')
+	mark_cAsInA		// nominal verb suffix
+	mark_DA			// noun suffix
+	mark_DAn		// noun suffix
+	mark_DUr		// nominal verb suffix
+	mark_ki			// noun suffix
+	mark_lAr		// noun suffix, nominal verb suffix
+	mark_lArI		// noun suffix
+	mark_nA			// noun suffix
+	mark_ncA		// noun suffix
+	mark_ndA		// noun suffix
+	mark_ndAn		// noun suffix
+	mark_nU			// noun suffix
+	mark_nUn		// noun suffix
+	mark_nUz		// nominal verb suffix
+	mark_sU			// noun suffix
+	mark_sUn		// nominal verb suffix
+	mark_sUnUz		// nominal verb suffix
+	mark_possessives	// -(U)m,-(U)n,-(U)mUz,-(U)nUz,
+	mark_yA			// noun suffix
+	mark_ylA		// noun suffix
+	mark_yU			// noun suffix
+	mark_yUm		// nominal verb suffix
+	mark_yUz		// nominal verb suffix
+	mark_yDU		// nominal verb suffix
+	mark_yken		// nominal verb suffix
+	mark_ymUs_		// nominal verb suffix
+	mark_ysA		// nominal verb suffix
+	
+	mark_suffix_with_optional_y_consonant
+	mark_suffix_with_optional_U_vowel
+	mark_suffix_with_optional_n_consonant
+	mark_suffix_with_optional_s_consonant
+	
+	more_than_one_syllable_word
+	
+	post_process_last_consonants
+	postlude
+
+	stem_nominal_verb_suffixes
+	stem_noun_suffixes
+	stem_suffix_chain_before_ki
+)
+
+/* Special characters in Unicode Latin-1 and Latin Extended-A */
+stringdef c.   	hex 'E7'	// LATIN SMALL LETTER C WITH CEDILLA
+stringdef g~   	hex '011F'	// LATIN SMALL LETTER G WITH BREVE
+stringdef i'   	hex '0131'	// LATIN SMALL LETTER I WITHOUT DOT
+stringdef o"  	hex 'F6'	// LATIN SMALL LETTER O WITH DIAERESIS
+stringdef s.	hex '015F'	// LATIN SMALL LETTER S WITH CEDILLA
+stringdef u"  	hex 'FC'	// LATIN SMALL LETTER U WITH DIAERESIS
+
+stringescapes 	{ }
+
+integers 	( strlen )	// length of a string
+
+booleans	( continue_stemming_noun_suffixes )
+
+groupings 	( vowel U vowel1 vowel2 vowel3 vowel4 vowel5 vowel6)
+
+define vowel 	'ae{i'}io{o"}u{u"}'
+define U	'{i'}iu{u"}'
+
+// the vowel grouping definitions below are used for checking vowel harmony
+define vowel1  	'a{i'}ou' 		// vowels that can end with suffixes containing 'a'
+define vowel2  	'ei{o"}{u"}' 		// vowels that can end with suffixes containing 'e'
+define vowel3  	'a{i'}' 		// vowels that can end with suffixes containing 'i''
+define vowel4  	'ei'	 		// vowels that can end with suffixes containing 'i'
+define vowel5  	'ou'	 		// vowels that can end with suffixes containing 'o' or 'u'
+define vowel6  	'{o"}{u"}' 		// vowels that can end with suffixes containing 'o"' or 'u"'
+
+externals 	( stem )
+
+backwardmode (
+	// checks vowel harmony for possible suffixes,
+	// helps to detect whether the candidate for suffix applies to vowel harmony
+	// this rule is added to prevent over stemming
+	define check_vowel_harmony as (
+		test
+		(
+			(goto vowel)   // if there is a vowel
+			(
+				('a' goto vowel1) or
+				('e' goto vowel2) or
+				('{i'}' goto vowel3) or
+				('i' goto vowel4) or
+				('o' goto vowel5) or
+				('{o"}' goto vowel6) or
+				('u' goto vowel5) or
+				('{u"}' goto vowel6)
+			)
+		)
+	)
+	
+	// if the last consonant before suffix is vowel and n then advance and delete
+	// if the last consonant before suffix is non vowel and n do nothing
+	// if the last consonant before suffix is not n then only delete the suffix
+	// assumption: slice beginning is set correctly
+	define mark_suffix_with_optional_n_consonant as (
+		((test 'n') next (test vowel))
+		or
+		((not(test 'n')) test(next (test vowel)))
+
+	)
+	
+	// if the last consonant before suffix is vowel and s then advance and delete
+	// if the last consonant before suffix is non vowel and s do nothing
+	// if the last consonant before suffix is not s then only delete the suffix
+	// assumption: slice beginning is set correctly
+	define mark_suffix_with_optional_s_consonant as (
+		((test 's') next (test vowel))
+		or
+		((not(test 's')) test(next (test vowel)))
+	)
+	
+	// if the last consonant before suffix is vowel and y then advance and delete
+	// if the last consonant before suffix is non vowel and y do nothing
+	// if the last consonant before suffix is not y then only delete the suffix
+	// assumption: slice beginning is set correctly
+	define mark_suffix_with_optional_y_consonant as (
+		((test 'y') next (test vowel))
+		or
+		((not(test 'y')) test(next (test vowel)))
+	)
+	
+	define mark_suffix_with_optional_U_vowel as (
+		((test U) next (test non-vowel))
+		or
+		((not(test U)) test(next (test non-vowel)))
+
+	)
+	
+	define mark_possessives as (
+		among ('m{i'}z' 'miz' 'muz' 'm{u"}z'
+		       'n{i'}z' 'niz' 'nuz' 'n{u"}z' 'm' 'n')
+		(mark_suffix_with_optional_U_vowel)
+	)
+	
+	define mark_sU as (
+		check_vowel_harmony
+		U
+		(mark_suffix_with_optional_s_consonant)
+	)
+	
+	define mark_lArI as (
+		among ('leri' 'lar{i'}')
+	)
+	
+	define mark_yU as (
+		check_vowel_harmony
+		U
+		(mark_suffix_with_optional_y_consonant)	
+	)
+	
+	define mark_nU as (
+		check_vowel_harmony
+		among ('n{i'}' 'ni' 'nu' 'n{u"}')	
+	)
+	
+	define mark_nUn as (
+		check_vowel_harmony
+		among ('{i'}n' 'in' 'un' '{u"}n')	
+		(mark_suffix_with_optional_n_consonant)
+	)
+	
+	define mark_yA as (
+		check_vowel_harmony
+		among('a' 'e')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_nA as (
+		check_vowel_harmony
+		among('na' 'ne')
+	)
+	
+	define mark_DA as (
+		check_vowel_harmony
+		among('da' 'de' 'ta' 'te')
+	)
+	
+	define mark_ndA as (
+		check_vowel_harmony
+		among('nda' 'nde')
+	)
+	
+	define mark_DAn as (
+		check_vowel_harmony
+		among('dan' 'den' 'tan' 'ten')
+	)
+	
+	define mark_ndAn as (
+		check_vowel_harmony
+		among('ndan' 'nden')
+	)
+	
+	define mark_ylA as (
+		check_vowel_harmony
+		among('la' 'le')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_ki as (
+		'ki'
+	)
+	
+	define mark_ncA as (
+		check_vowel_harmony
+		among('ca' 'ce')	
+		(mark_suffix_with_optional_n_consonant)
+	)
+	
+	define mark_yUm as (
+		check_vowel_harmony
+		among ('{i'}m' 'im' 'um' '{u"}m')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_sUn as (
+		check_vowel_harmony
+		among ('s{i'}n' 'sin' 'sun' 's{u"}n' )
+	)
+	
+	define mark_yUz as (
+		check_vowel_harmony
+		among ('{i'}z' 'iz' 'uz' '{u"}z')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_sUnUz as (
+		among ('s{i'}n{i'}z' 'siniz' 'sunuz' 's{u"}n{u"}z')
+	)
+	
+	define mark_lAr as (
+		check_vowel_harmony
+		among ('ler' 'lar')
+	)
+	
+	define mark_nUz as (
+		check_vowel_harmony
+		among ('n{i'}z' 'niz' 'nuz' 'n{u"}z')
+	)
+	
+	define mark_DUr as (
+		check_vowel_harmony
+		among ('t{i'}r' 'tir' 'tur' 't{u"}r' 'd{i'}r' 'dir' 'dur' 'd{u"}r')
+	)
+	
+	define mark_cAsInA as (
+		among ('cas{i'}na' 'cesine')
+	)
+	
+	define mark_yDU as (
+		check_vowel_harmony
+		among ('t{i'}m' 'tim' 'tum' 't{u"}m' 'd{i'}m' 'dim' 'dum' 'd{u"}m'
+			't{i'}n' 'tin' 'tun' 't{u"}n' 'd{i'}n' 'din' 'dun' 'd{u"}n'
+			't{i'}k' 'tik' 'tuk' 't{u"}k' 'd{i'}k' 'dik' 'duk' 'd{u"}k'
+			't{i'}' 'ti' 'tu' 't{u"}' 'd{i'}' 'di' 'du' 'd{u"}')
+		(mark_suffix_with_optional_y_consonant)
+	)
+
+	// does not fully obey vowel harmony	
+	define mark_ysA as (
+		among ('sam' 'san' 'sak' 'sem' 'sen' 'sek' 'sa' 'se')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_ymUs_ as (
+		check_vowel_harmony
+		among ('m{i'}{s.}' 'mi{s.}' 'mu{s.}' 'm{u"}{s.}')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_yken as (
+		'ken' (mark_suffix_with_optional_y_consonant)
+	)
+	
+	define stem_nominal_verb_suffixes as (
+		[	
+			set continue_stemming_noun_suffixes
+			(mark_ymUs_ or mark_yDU or mark_ysA or mark_yken)
+			or
+			(mark_cAsInA (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_)
+			or
+			(
+				mark_lAr ] delete try([(mark_DUr or mark_yDU or mark_ysA or mark_ymUs_))
+				unset continue_stemming_noun_suffixes
+			)
+			or
+			(mark_nUz (mark_yDU or mark_ysA))
+			or
+			((mark_sUnUz or mark_yUz or mark_sUn or mark_yUm) ] delete try([ mark_ymUs_))
+			or
+			(mark_DUr ] delete try([ (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_))
+		]delete
+	)
+	
+	// stems noun suffix chains ending with -ki
+	define stem_suffix_chain_before_ki as (
+		[
+			mark_ki
+			(
+				(mark_DA] delete try([
+					(mark_lAr] delete try(stem_suffix_chain_before_ki))
+					or
+					(mark_possessives] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+					
+				))
+				or
+				(mark_nUn] delete try([
+					(mark_lArI] delete)
+					or
+					([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+					or
+					(stem_suffix_chain_before_ki)
+				))
+				or
+				(mark_ndA (	
+					(mark_lArI] delete)
+					or
+					((mark_sU] delete try([mark_lAr]delete stem_suffix_chain_before_ki)))
+					or
+					(stem_suffix_chain_before_ki)
+				))
+			)
+	)
+	
+	define stem_noun_suffixes as (
+		([mark_lAr] delete try(stem_suffix_chain_before_ki))
+		or
+		([mark_ncA] delete
+			try(
+				([mark_lArI] delete)
+				or
+				([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+				or
+				([mark_lAr] delete stem_suffix_chain_before_ki)
+			)
+		)
+		or
+		([(mark_ndA or mark_nA)
+			(
+		  		(mark_lArI] delete)
+		  		or
+		  		(mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+		  		or
+		  		(stem_suffix_chain_before_ki)
+		  	)
+		)
+		or
+		([(mark_ndAn or mark_nU) ((mark_sU ] delete try([mark_lAr] delete stem_suffix_chain_before_ki)) or (mark_lArI)))
+		or
+		( [mark_DAn] delete try ([
+			(
+		 		(mark_possessives ] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+		 		or
+		 		(mark_lAr] delete try(stem_suffix_chain_before_ki))
+		 		or
+		 		(stem_suffix_chain_before_ki)
+		 	))
+		)
+		or
+		([mark_nUn or mark_ylA] delete
+			try(
+				([mark_lAr] delete stem_suffix_chain_before_ki)
+				or
+				([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+				or
+				stem_suffix_chain_before_ki
+			)
+		)
+		or
+		([mark_lArI] delete)
+		or	
+		(stem_suffix_chain_before_ki)
+		or
+		([mark_DA or mark_yU or mark_yA] delete try([((mark_possessives] delete try([mark_lAr)) or mark_lAr) ] delete [ stem_suffix_chain_before_ki))
+		or
+		([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+	)
+	
+	define post_process_last_consonants as (	
+		[substring] among (
+			'b' (<- 'p')
+			'c' (<- '{c.}')
+			'd' (<- 't')
+			'{g~}' (<- 'k')
+		)
+	)
+
+	// after stemming if the word ends with 'd' or 'g' most probably last U is overstemmed
+	// like in 'kedim' -> 'ked'
+	// Turkish words don't usually end with 'd' or 'g'
+	// some very well known words are ignored (like 'ad' 'soyad'	
+	// appends U to stems ending with d or g, decides which vowel to add
+	// based on the last vowel in the stem
+	define append_U_to_stems_ending_with_d_or_g as (
+		test('d' or 'g')
+		(test((goto vowel) 'a' or '{i'}') <+ '{i'}')
+		or
+		(test((goto vowel) 'e' or 'i') <+ 'i')
+		or
+		(test((goto vowel) 'o' or 'u') <+ 'u')
+		or
+		(test((goto vowel) '{o"}' or '{u"}') <+ '{u"}')
+	)
+	
+)
+
+// Tests if there are more than one syllables
+// In Turkish each vowel indicates a distinct syllable
+define more_than_one_syllable_word as (
+	test (atleast 2 (gopast vowel))
+)
+
+define is_reserved_word as (
+	test(gopast 'ad' ($strlen = 2) ($strlen == limit))
+	or
+	test(gopast 'soyad' ($strlen = 5) ($strlen == limit))
+)
+
+define postlude as (
+	not(is_reserved_word)
+	backwards (
+		do append_U_to_stems_ending_with_d_or_g
+		do post_process_last_consonants
+		
+	)
+)
+
+define stem as (
+	(more_than_one_syllable_word)
+	(
+		backwards (
+			do stem_nominal_verb_suffixes
+			continue_stemming_noun_suffixes
+			do stem_noun_suffixes
+		)
+		
+	postlude
+	)
+)
+
+
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-12-31 17:38:02 +0000
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-12-31 17:38:02 +0000
commit	2375dba898b481837879940dfdcf3ea85248fe01 (patch)
tree	cced5fb680e9a362d1de25630bff537865d38365 /contrib/snowball/algorithms
parent	1543c98d38ffb84a1e405081436d0a25bee713a6 (diff)
download	rspamd-2375dba898b481837879940dfdcf3ea85248fe01.tar.gz rspamd-2375dba898b481837879940dfdcf3ea85248fe01.zip