You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

modules.txt 2.8KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # This file contains a list of stemmers to include in the distribution.
  2. # The format is a set of space separated lines - on each line:
  3. # First item is name of stemmer.
  4. # Second item is comma separated list of character sets.
  5. # Third item is comma separated list of names to refer to the stemmer by.
  6. #
  7. # Lines starting with a #, or blank lines, are ignored.
  8. # List all the main algorithms for each language, in UTF-8, and also with
  9. # the most commonly used encoding.
  10. arabic UTF_8 arabic,ar,ara
  11. danish UTF_8 danish,da,dan
  12. dutch UTF_8 dutch,nl,dut,nld
  13. english UTF_8 english,en,eng
  14. finnish UTF_8 finnish,fi,fin
  15. french UTF_8 french,fr,fre,fra
  16. german UTF_8 german,de,ger,deu
  17. greek UTF_8 greek,el,gre,ell
  18. hindi UTF_8 hindi,hi,hin
  19. hungarian UTF_8 hungarian,hu,hun
  20. indonesian UTF_8 indonesian,id,ind
  21. italian UTF_8 italian,it,ita
  22. lithuanian UTF_8 lithuanian,lt,lit
  23. nepali UTF_8 nepali,ne,nep
  24. norwegian UTF_8 norwegian,no,nor
  25. portuguese UTF_8 portuguese,pt,por
  26. romanian UTF_8 romanian,ro,rum,ron
  27. russian UTF_8 russian,ru,rus
  28. serbian UTF_8 serbian,sr,srp
  29. spanish UTF_8 spanish,es,esl,spa
  30. swedish UTF_8 swedish,sv,swe
  31. tamil UTF_8 tamil,ta,tam
  32. turkish UTF_8 turkish,tr,tur
  33. # Also include the traditional porter algorithm for english.
  34. # The porter algorithm is included in the libstemmer distribution to assist
  35. # with backwards compatibility, but for new systems the english algorithm
  36. # should be used in preference.
  37. porter UTF_8 porter english
  38. # Some other stemmers in the snowball project are not included in the standard
  39. # distribution. To compile a libstemmer with them in, add them to this list,
  40. # and regenerate the distribution. (You will need a full source checkout for
  41. # this.) They are included in the snowball website as curiosities, but are not
  42. # intended for general use, and use of them is is not fully supported. These
  43. # algorithms are:
  44. #
  45. # german2 - This is a slight modification of the german stemmer.
  46. #german2 UTF_8,ISO_8859_1 german2 german
  47. #
  48. # kraaij_pohlmann - This is a different dutch stemmer.
  49. #kraaij_pohlmann UTF_8,ISO_8859_1 kraaij_pohlmann dutch
  50. #
  51. # lovins - This is an english stemmer, but fairly outdated, and
  52. # only really applicable to a restricted type of input text
  53. # (keywords in academic publications).
  54. #lovins UTF_8,ISO_8859_1 lovins english