diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-12-31 17:38:02 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-12-31 17:38:02 +0000 |
commit | 2375dba898b481837879940dfdcf3ea85248fe01 (patch) | |
tree | cced5fb680e9a362d1de25630bff537865d38365 /contrib/snowball/algorithms/german | |
parent | 1543c98d38ffb84a1e405081436d0a25bee713a6 (diff) | |
download | rspamd-2375dba898b481837879940dfdcf3ea85248fe01.tar.gz rspamd-2375dba898b481837879940dfdcf3ea85248fe01.zip |
Remove bloody submodules.
Diffstat (limited to 'contrib/snowball/algorithms/german')
-rw-r--r-- | contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl | 139 | ||||
-rw-r--r-- | contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl | 139 |
2 files changed, 278 insertions, 0 deletions
diff --git a/contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl b/contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl new file mode 100644 index 000000000..7069daf0d --- /dev/null +++ b/contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl @@ -0,0 +1,139 @@ + +/* + Extra rule for -nisse ending added 11 Dec 2009 +*/ + +routines ( + prelude postlude + mark_regions + R1 R2 + standard_suffix +) + +externals ( stem ) + +integers ( p1 p2 x ) + +groupings ( v s_ending st_ending ) + +stringescapes {} + +/* special characters (in ISO Latin I) */ + +stringdef a" hex 'E4' +stringdef o" hex 'F6' +stringdef u" hex 'FC' +stringdef ss hex 'DF' + +define v 'aeiouy{a"}{o"}{u"}' + +define s_ending 'bdfghklmnrt' +define st_ending s_ending - 'r' + +define prelude as ( + + test repeat ( + ( + ['{ss}'] <- 'ss' + ) or next + ) + + repeat goto ( + v [('u'] v <- 'U') or + ('y'] v <- 'Y') + ) +) + +define mark_regions as ( + + $p1 = limit + $p2 = limit + + test(hop 3 setmark x) + + gopast v gopast non-v setmark p1 + try($p1 < x $p1 = x) // at least 3 + gopast v gopast non-v setmark p2 + +) + +define postlude as repeat ( + + [substring] among( + 'Y' (<- 'y') + 'U' (<- 'u') + '{a"}' (<- 'a') + '{o"}' (<- 'o') + '{u"}' (<- 'u') + '' (next) + ) + +) + +backwardmode ( + + define R1 as $p1 <= cursor + define R2 as $p2 <= cursor + + define standard_suffix as ( + do ( + [substring] R1 among( + 'em' 'ern' 'er' + ( delete + ) + 'e' 'en' 'es' + ( delete + try (['s'] 'nis' delete) + ) + 's' + ( s_ending delete + ) + ) + ) + do ( + [substring] R1 among( + 'en' 'er' 'est' + ( delete + ) + 'st' + ( st_ending hop 3 delete + ) + ) + ) + do ( + [substring] R2 among( + 'end' 'ung' + ( delete + try (['ig'] not 'e' R2 delete) + ) + 'ig' 'ik' 'isch' + ( not 'e' delete + ) + 'lich' 'heit' + ( delete + try ( + ['er' or 'en'] R1 delete + ) + ) + 'keit' + ( delete + try ( + [substring] R2 among( + 'lich' 'ig' + ( delete + ) + ) + ) + ) + ) + ) + ) +) + +define stem as ( + do prelude + do mark_regions + backwards + do standard_suffix + do postlude +) diff --git a/contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl b/contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl new file mode 100644 index 000000000..3effb3257 --- /dev/null +++ b/contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl @@ -0,0 +1,139 @@ + +/* + Extra rule for -nisse ending added 11 Dec 2009 +*/ + +routines ( + prelude postlude + mark_regions + R1 R2 + standard_suffix +) + +externals ( stem ) + +integers ( p1 p2 x ) + +groupings ( v s_ending st_ending ) + +stringescapes {} + +/* special characters (in MS-DOS Latin I) */ + +stringdef a" hex '84' +stringdef o" hex '94' +stringdef u" hex '81' +stringdef ss hex 'E1' + +define v 'aeiouy{a"}{o"}{u"}' + +define s_ending 'bdfghklmnrt' +define st_ending s_ending - 'r' + +define prelude as ( + + test repeat ( + ( + ['{ss}'] <- 'ss' + ) or next + ) + + repeat goto ( + v [('u'] v <- 'U') or + ('y'] v <- 'Y') + ) +) + +define mark_regions as ( + + $p1 = limit + $p2 = limit + + test(hop 3 setmark x) + + gopast v gopast non-v setmark p1 + try($p1 < x $p1 = x) // at least 3 + gopast v gopast non-v setmark p2 + +) + +define postlude as repeat ( + + [substring] among( + 'Y' (<- 'y') + 'U' (<- 'u') + '{a"}' (<- 'a') + '{o"}' (<- 'o') + '{u"}' (<- 'u') + '' (next) + ) + +) + +backwardmode ( + + define R1 as $p1 <= cursor + define R2 as $p2 <= cursor + + define standard_suffix as ( + do ( + [substring] R1 among( + 'em' 'ern' 'er' + ( delete + ) + 'e' 'en' 'es' + ( delete + try (['s'] 'nis' delete) + ) + 's' + ( s_ending delete + ) + ) + ) + do ( + [substring] R1 among( + 'en' 'er' 'est' + ( delete + ) + 'st' + ( st_ending hop 3 delete + ) + ) + ) + do ( + [substring] R2 among( + 'end' 'ung' + ( delete + try (['ig'] not 'e' R2 delete) + ) + 'ig' 'ik' 'isch' + ( not 'e' delete + ) + 'lich' 'heit' + ( delete + try ( + ['er' or 'en'] R1 delete + ) + ) + 'keit' + ( delete + try ( + [substring] R2 among( + 'lich' 'ig' + ( delete + ) + ) + ) + ) + ) + ) + ) +) + +define stem as ( + do prelude + do mark_regions + backwards + do standard_suffix + do postlude +) |