mirror of
https://github.com/rspamd/rspamd.git
synced 2024-08-19 14:10:55 +02:00
59 lines
2.8 KiB
Plaintext
59 lines
2.8 KiB
Plaintext
# This file contains a list of stemmers to include in the distribution.
|
|
# The format is a set of space separated lines - on each line:
|
|
# First item is name of stemmer.
|
|
# Second item is comma separated list of character sets.
|
|
# Third item is comma separated list of names to refer to the stemmer by.
|
|
#
|
|
# Lines starting with a #, or blank lines, are ignored.
|
|
|
|
# List all the main algorithms for each language, in UTF-8, and also with
|
|
# the most commonly used encoding.
|
|
|
|
arabic UTF_8 arabic,ar,ara
|
|
danish UTF_8 danish,da,dan
|
|
dutch UTF_8 dutch,nl,dut,nld
|
|
english UTF_8 english,en,eng
|
|
finnish UTF_8 finnish,fi,fin
|
|
french UTF_8 french,fr,fre,fra
|
|
german UTF_8 german,de,ger,deu
|
|
greek UTF_8 greek,el,gre,ell
|
|
hindi UTF_8 hindi,hi,hin
|
|
hungarian UTF_8 hungarian,hu,hun
|
|
indonesian UTF_8 indonesian,id,ind
|
|
italian UTF_8 italian,it,ita
|
|
lithuanian UTF_8 lithuanian,lt,lit
|
|
nepali UTF_8 nepali,ne,nep
|
|
norwegian UTF_8 norwegian,no,nor
|
|
portuguese UTF_8 portuguese,pt,por
|
|
romanian UTF_8 romanian,ro,rum,ron
|
|
russian UTF_8 russian,ru,rus
|
|
serbian UTF_8 serbian,sr,srp
|
|
spanish UTF_8 spanish,es,esl,spa
|
|
swedish UTF_8 swedish,sv,swe
|
|
tamil UTF_8 tamil,ta,tam
|
|
turkish UTF_8 turkish,tr,tur
|
|
|
|
# Also include the traditional porter algorithm for english.
|
|
# The porter algorithm is included in the libstemmer distribution to assist
|
|
# with backwards compatibility, but for new systems the english algorithm
|
|
# should be used in preference.
|
|
porter UTF_8 porter english
|
|
|
|
# Some other stemmers in the snowball project are not included in the standard
|
|
# distribution. To compile a libstemmer with them in, add them to this list,
|
|
# and regenerate the distribution. (You will need a full source checkout for
|
|
# this.) They are included in the snowball website as curiosities, but are not
|
|
# intended for general use, and use of them is is not fully supported. These
|
|
# algorithms are:
|
|
#
|
|
# german2 - This is a slight modification of the german stemmer.
|
|
#german2 UTF_8,ISO_8859_1 german2 german
|
|
#
|
|
# kraaij_pohlmann - This is a different dutch stemmer.
|
|
#kraaij_pohlmann UTF_8,ISO_8859_1 kraaij_pohlmann dutch
|
|
#
|
|
# lovins - This is an english stemmer, but fairly outdated, and
|
|
# only really applicable to a restricted type of input text
|
|
# (keywords in academic publications).
|
|
#lovins UTF_8,ISO_8859_1 lovins english
|