aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--contrib/snowball/libstemmer/modules.txt50
-rw-r--r--contrib/snowball/libstemmer/modules_utf8.txt49
2 files changed, 99 insertions, 0 deletions
diff --git a/contrib/snowball/libstemmer/modules.txt b/contrib/snowball/libstemmer/modules.txt
new file mode 100644
index 000000000..d237c1d8d
--- /dev/null
+++ b/contrib/snowball/libstemmer/modules.txt
@@ -0,0 +1,50 @@
+# This file contains a list of stemmers to include in the distribution.
+# The format is a set of space separated lines - on each line:
+# First item is name of stemmer.
+# Second item is comma separated list of character sets.
+# Third item is comma separated list of names to refer to the stemmer by.
+#
+# Lines starting with a #, or blank lines, are ignored.
+
+# List all the main algorithms for each language, in UTF-8, and also with
+# the most commonly used encoding.
+
+danish UTF_8,ISO_8859_1 danish,da,dan
+dutch UTF_8,ISO_8859_1 dutch,nl,dut,nld
+english UTF_8,ISO_8859_1 english,en,eng
+finnish UTF_8,ISO_8859_1 finnish,fi,fin
+french UTF_8,ISO_8859_1 french,fr,fre,fra
+german UTF_8,ISO_8859_1 german,de,ger,deu
+hungarian UTF_8,ISO_8859_2 hungarian,hu,hun
+italian UTF_8,ISO_8859_1 italian,it,ita
+norwegian UTF_8,ISO_8859_1 norwegian,no,nor
+portuguese UTF_8,ISO_8859_1 portuguese,pt,por
+romanian UTF_8,ISO_8859_2 romanian,ro,rum,ron
+russian UTF_8,KOI8_R russian,ru,rus
+spanish UTF_8,ISO_8859_1 spanish,es,esl,spa
+swedish UTF_8,ISO_8859_1 swedish,sv,swe
+turkish UTF_8 turkish,tr,tur
+
+# Also include the traditional porter algorithm for english.
+# The porter algorithm is included in the libstemmer distribution to assist
+# with backwards compatibility, but for new systems the english algorithm
+# should be used in preference.
+porter UTF_8,ISO_8859_1 porter
+
+# Some other stemmers in the snowball project are not included in the standard
+# distribution. To compile a libstemmer with them in, add them to this list,
+# and regenerate the distribution. (You will need a full source checkout for
+# this.) They are included in the snowball website as curiosities, but are not
+# intended for general use, and use of them is is not fully supported. These
+# algorithms are:
+#
+# german2 - This is a slight modification of the german stemmer.
+#german2 UTF_8,ISO_8859_1 german2
+#
+# kraaij_pohlmann - This is a different dutch stemmer.
+#kraaij_pohlmann UTF_8,ISO_8859_1 kraaij_pohlmann
+#
+# lovins - This is an english stemmer, but fairly outdated, and
+# only really applicable to a restricted type of input text
+# (keywords in academic publications).
+#lovins UTF_8,ISO_8859_1 lovins
diff --git a/contrib/snowball/libstemmer/modules_utf8.txt b/contrib/snowball/libstemmer/modules_utf8.txt
new file mode 100644
index 000000000..60a0e1d0c
--- /dev/null
+++ b/contrib/snowball/libstemmer/modules_utf8.txt
@@ -0,0 +1,49 @@
+# This file contains a list of stemmers to include in the distribution.
+# The format is a set of space separated lines - on each line:
+# First item is name of stemmer.
+# Second item is comma separated list of character sets.
+# Third item is comma separated list of names to refer to the stemmer by.
+#
+# Lines starting with a #, or blank lines, are ignored.
+
+# List all the main algorithms for each language, in UTF-8.
+
+danish UTF_8 danish,da,dan
+dutch UTF_8 dutch,nl,dut,nld
+english UTF_8 english,en,eng
+finnish UTF_8 finnish,fi,fin
+french UTF_8 french,fr,fre,fra
+german UTF_8 german,de,ger,deu
+hungarian UTF_8 hungarian,hu,hun
+italian UTF_8 italian,it,ita
+norwegian UTF_8 norwegian,no,nor
+portuguese UTF_8 portuguese,pt,por
+romanian UTF_8 romanian,ro,rum,ron
+russian UTF_8 russian,ru,rus
+spanish UTF_8 spanish,es,esl,spa
+swedish UTF_8 swedish,sv,swe
+turkish UTF_8 turkish,tr,tur
+
+# Also include the traditional porter algorithm for english.
+# The porter algorithm is included in the libstemmer distribution to assist
+# with backwards compatibility, but for new systems the english algorithm
+# should be used in preference.
+porter UTF_8 porter
+
+# Some other stemmers in the snowball project are not included in the standard
+# distribution. To compile a libstemmer with them in, add them to this list,
+# and regenerate the distribution. (You will need a full source checkout for
+# this.) They are included in the snowball website as curiosities, but are not
+# intended for general use, and use of them is is not fully supported. These
+# algorithms are:
+#
+# german2 - This is a slight modification of the german stemmer.
+#german2 UTF_8 german2
+#
+# kraaij_pohlmann - This is a different dutch stemmer.
+#kraaij_pohlmann UTF_8 kraaij_pohlmann
+#
+# lovins - This is an english stemmer, but fairly outdated, and
+# only really applicable to a restricted type of input text
+# (keywords in academic publications).
+#lovins UTF_8 lovins