4 years ago · b87995255f
--- a/contrib/snowball/CMakeLists.txt
+++ b/contrib/snowball/CMakeLists.txt
@@ -1,20 +1,14 @@
 # End of configuration
 SET(LIBSTEM_ALGORITHMS danish dutch english finnish french german hungarian
 	italian norwegian porter portuguese romanian
 	russian spanish swedish turkish)
 SET(KOI8_ALGORITHMS russian)
 SET(ISO_8859_1_ALGORITHMS danish dutch english finnish french german italian
 			norwegian porter portuguese spanish swedish)
 SET(ISO_8859_2_ALGORITHMS hungarian romanian)
 SET(OTHER_ALGORITHMS german2 kraaij_pohlmann lovins)
 SET(ALL_ALGORITHMS ${LIBSTEM_ALGORITHMS} ${OTHER_ALGORITHMS})
 SET(LIBSTEM_ALGORITHMS arabic danish dutch english finnish french german greek hindi hungarian
 	indonesian italian lithuanian nepali norwegian porter portuguese romanian
 	russian serbian spanish swedish tamil turkish)
 SET(ALL_ALGORITHMS ${LIBSTEM_ALGORITHMS})

 SET(COMPILER_SOURCES compiler/space.c
 		   compiler/tokeniser.c
 		   compiler/analyser.c
 		   compiler/generator.c
 		   compiler/driver.c
 		   compiler/generator_java.c)
 		   compiler/driver.c)

 SET(SNOWBALL_RUNTIME runtime/api.c
 		   runtime/utilities.c)
@@ -24,9 +18,15 @@ SET(LIBSTEMMER_UTF8_SOURCES libstemmer/libstemmer_utf8.c)
 #LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
 #LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in

 SET(STEMWORDS_SOURCES examples/stemwords.c)
 SET(MODULES_H "modules.h")
 CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/libstemmer_c.in ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/libstemmer.c @ONLY)
 ADD_DEFINITIONS("-DDISABLE_JS")
 ADD_DEFINITIONS("-DDISABLE_GO")
 ADD_DEFINITIONS("-DDISABLE_JAVA")
 ADD_DEFINITIONS("-DDISABLE_PYTHON")
 ADD_DEFINITIONS("-DDISABLE_CSHARP")
 ADD_DEFINITIONS("-DDISABLE_PASCAL")
 ADD_DEFINITIONS("-DDISABLE_RUST")

 MACRO(gen_stem IN ENCODING)
 	FOREACH(_it ${IN})
@@ -34,7 +34,7 @@ MACRO(gen_stem IN ENCODING)
 		SET(_header "${_base}.h")
 		SET(_source "${_base}.c")
 		STRING(REPLACE "UTF_8" "Unicode" _in_enc "${ENCODING}")
 		SET(_input "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}/stem_${_in_enc}.sbl")
 		SET(_input "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}.sbl")
 		IF(${_in_enc} STREQUAL "Unicode" AND NOT EXISTS ${_input})
 			ADD_CUSTOM_COMMAND(OUTPUT ${_source}
 				COMMAND ${CMAKE_CURRENT_BINARY_DIR}/snowball  "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}/stem_ISO_8859_1.sbl" -o ${_base} -eprefix ${_it}_${ENCODING}_ -r ../runtime -u
@@ -57,7 +57,7 @@ INCLUDE_DIRECTORIES("include")
 ADD_EXECUTABLE(snowball ${COMPILER_SOURCES})

 ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h
 COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/mkmodules.pl libstemmer/modules.h libstemmer ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/modules.txt libstemmer/mkinc.mak)
 COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/mkmodules.pl ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h libstemmer ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/modules.txt ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/mkinc.mak)
 ADD_CUSTOM_TARGET(modules DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h")

 SET(STEMMER_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/libstemmer.c")
@@ -65,13 +65,6 @@ ADD_CUSTOM_TARGET(stemmer_deps ALL)
 ADD_DEPENDENCIES(stemmer_deps modules)

 gen_stem("${LIBSTEM_ALGORITHMS}" "UTF_8")
 gen_stem("${KOI8_ALGORITHMS}" "KOI8_R")
 gen_stem("${ISO_8859_1_ALGORITHMS}" "ISO_8859_1")
 gen_stem("${ISO_8859_2_ALGORITHMS}" "ISO_8859_2")


 ADD_LIBRARY(stemmer ${LINK_TYPE} ${SNOWBALL_RUNTIME} ${STEMMER_SOURCES})
 ADD_DEPENDENCIES(stemmer stemmer_deps)

 ADD_EXECUTABLE(stemwords ${STEMWORDS_SOURCES})
 TARGET_LINK_LIBRARIES(stemwords stemmer)
--- a/contrib/snowball/GNUmakefile
+++ b/contrib/snowball/GNUmakefile
@@ -1,300 +0,0 @@
 # -*- makefile -*-

 c_src_dir = src_c
 java_src_main_dir = java/org/tartarus/snowball
 java_src_dir = $(java_src_main_dir)/ext

 libstemmer_algorithms = danish dutch english finnish french german hungarian \
 			italian \
 			norwegian porter portuguese romanian \
 			russian spanish swedish turkish

 KOI8_R_algorithms = russian
 ISO_8859_1_algorithms = danish dutch english finnish french german italian \
 			norwegian porter portuguese spanish swedish
 ISO_8859_2_algorithms = hungarian romanian

 other_algorithms = german2 kraaij_pohlmann lovins

 all_algorithms = $(libstemmer_algorithms) $(other_algorithms)

 COMPILER_SOURCES = compiler/space.c \
 		   compiler/tokeniser.c \
 		   compiler/analyser.c \
 		   compiler/generator.c \
 		   compiler/driver.c \
 		   compiler/generator_java.c
 COMPILER_HEADERS = compiler/header.h \
 		   compiler/syswords.h \
 		   compiler/syswords2.h

 RUNTIME_SOURCES  = runtime/api.c \
 		   runtime/utilities.c
 RUNTIME_HEADERS  = runtime/api.h \
 		   runtime/header.h

 JAVARUNTIME_SOURCES = java/org/tartarus/snowball/Among.java \
 		      java/org/tartarus/snowball/SnowballProgram.java \
 		      java/org/tartarus/snowball/SnowballStemmer.java \
 		      java/org/tartarus/snowball/TestApp.java

 LIBSTEMMER_SOURCES = libstemmer/libstemmer.c
 LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
 LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
 LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in

 STEMWORDS_SOURCES = examples/stemwords.c

 ALL_ALGORITHM_FILES = $(all_algorithms:%=algorithms/%/stem*.sbl)
 C_LIB_SOURCES = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c) \
 		$(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.c) \
 		$(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.c) \
 		$(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.c)
 C_LIB_HEADERS = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h) \
 		$(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.h) \
 		$(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.h) \
 		$(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.h)
 C_OTHER_SOURCES = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c)
 C_OTHER_HEADERS = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h)
 JAVA_SOURCES = $(libstemmer_algorithms:%=$(java_src_dir)/%Stemmer.java)

 COMPILER_OBJECTS=$(COMPILER_SOURCES:.c=.o)
 RUNTIME_OBJECTS=$(RUNTIME_SOURCES:.c=.o)
 LIBSTEMMER_OBJECTS=$(LIBSTEMMER_SOURCES:.c=.o)
 LIBSTEMMER_UTF8_OBJECTS=$(LIBSTEMMER_UTF8_SOURCES:.c=.o)
 STEMWORDS_OBJECTS=$(STEMWORDS_SOURCES:.c=.o)
 C_LIB_OBJECTS = $(C_LIB_SOURCES:.c=.o)
 C_OTHER_OBJECTS = $(C_OTHER_SOURCES:.c=.o)
 JAVA_CLASSES = $(JAVA_SOURCES:.java=.class)
 JAVA_RUNTIME_CLASSES=$(JAVARUNTIME_SOURCES:.java=.class)

 CFLAGS=-Iinclude -O2
 CPPFLAGS=-W -Wall -Wmissing-prototypes -Wmissing-declarations

 all: snowball libstemmer.o stemwords $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS)

 clean:
 	rm -f $(COMPILER_OBJECTS) $(RUNTIME_OBJECTS) \
 	      $(LIBSTEMMER_OBJECTS) $(LIBSTEMMER_UTF8_OBJECTS) $(STEMWORDS_OBJECTS) snowball \
 	      libstemmer.o stemwords \
              libstemmer/modules.h \
              libstemmer/modules_utf8.h \
              snowball.splint \
 	      $(C_LIB_SOURCES) $(C_LIB_HEADERS) $(C_LIB_OBJECTS) \
 	      $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) \
 	      $(JAVA_SOURCES) $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES) \
              libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \
              libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c
 	rm -rf dist
 	rmdir $(c_src_dir) || true

 snowball: $(COMPILER_OBJECTS)
 	$(CC) -o $@ $^

 $(COMPILER_OBJECTS): $(COMPILER_HEADERS)

 libstemmer/libstemmer.c: libstemmer/libstemmer_c.in
 	sed 's/@MODULES_H@/modules.h/' $^ >$@

 libstemmer/libstemmer_utf8.c: libstemmer/libstemmer_c.in
 	sed 's/@MODULES_H@/modules_utf8.h/' $^ >$@

 libstemmer/modules.h libstemmer/mkinc.mak: libstemmer/mkmodules.pl libstemmer/modules.txt
 	libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules.txt libstemmer/mkinc.mak

 libstemmer/modules_utf8.h libstemmer/mkinc_utf8.mak: libstemmer/mkmodules.pl libstemmer/modules_utf8.txt
 	libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules_utf8.txt libstemmer/mkinc_utf8.mak utf8

 libstemmer/libstemmer.o: libstemmer/modules.h $(C_LIB_HEADERS)

 libstemmer.o: libstemmer/libstemmer.o $(RUNTIME_OBJECTS) $(C_LIB_OBJECTS)
 	$(AR) -cru $@ $^

 stemwords: $(STEMWORDS_OBJECTS) libstemmer.o
 	$(CC) -o $@ $^

 algorithms/%/stem_Unicode.sbl: algorithms/%/stem_ISO_8859_1.sbl
 	cp $^ $@

 $(c_src_dir)/stem_UTF_8_%.c $(c_src_dir)/stem_UTF_8_%.h: algorithms/%/stem_Unicode.sbl snowball
 	@mkdir -p $(c_src_dir)
 	@l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
 	o="$(c_src_dir)/stem_UTF_8_$${l}"; \
 	echo "./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u"; \
 	./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u

 $(c_src_dir)/stem_KOI8_R_%.c $(c_src_dir)/stem_KOI8_R_%.h: algorithms/%/stem_KOI8_R.sbl snowball
 	@mkdir -p $(c_src_dir)
 	@l=`echo "$<" | sed 's!\(.*\)/stem_KOI8_R.sbl$$!\1!;s!^.*/!!'`; \
 	o="$(c_src_dir)/stem_KOI8_R_$${l}"; \
 	echo "./snowball $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime"; \
 	./snowball $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime

 $(c_src_dir)/stem_ISO_8859_1_%.c $(c_src_dir)/stem_ISO_8859_1_%.h: algorithms/%/stem_ISO_8859_1.sbl snowball
 	@mkdir -p $(c_src_dir)
 	@l=`echo "$<" | sed 's!\(.*\)/stem_ISO_8859_1.sbl$$!\1!;s!^.*/!!'`; \
 	o="$(c_src_dir)/stem_ISO_8859_1_$${l}"; \
 	echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime"; \
 	./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime

 $(c_src_dir)/stem_ISO_8859_2_%.c $(c_src_dir)/stem_ISO_8859_2_%.h: algorithms/%/stem_ISO_8859_2.sbl snowball
 	@mkdir -p $(c_src_dir)
 	@l=`echo "$<" | sed 's!\(.*\)/stem_ISO_8859_2.sbl$$!\1!;s!^.*/!!'`; \
 	o="$(c_src_dir)/stem_ISO_8859_2_$${l}"; \
 	echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime"; \
 	./snowball $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime

 $(c_src_dir)/stem_%.o: $(c_src_dir)/stem_%.c $(c_src_dir)/stem_%.h
 	$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<

 $(java_src_dir)/%Stemmer.java: algorithms/%/stem_Unicode.sbl snowball
 	@mkdir -p $(java_src_dir)
 	@l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
 	o="$(java_src_dir)/$${l}Stemmer"; \
 	echo "./snowball $< -j -o $${o} -p \"org.tartarus.snowball.SnowballStemmer\" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer"; \
 	./snowball $< -j -o $${o} -p "org.tartarus.snowball.SnowballStemmer" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer

 splint: snowball.splint
 snowball.splint: $(COMPILER_SOURCES)
 	splint $^ >$@ -weak

 # Make a full source distribution
 dist: dist_snowball dist_libstemmer_c dist_libstemmer_java

 # Make a distribution of all the sources involved in snowball
 dist_snowball: $(COMPILER_SOURCES) $(COMPILER_HEADERS) \
 	    $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
 	    $(LIBSTEMMER_SOURCES) \
 	    $(LIBSTEMMER_UTF8_SOURCES) \
            $(LIBSTEMMER_HEADERS) \
 	    $(LIBSTEMMER_EXTRA) \
 	    $(ALL_ALGORITHM_FILES) $(STEMWORDS_SOURCES) \
 	    GNUmakefile README doc/TODO libstemmer/mkmodules.pl
 	destname=snowball_code; \
 	dest=dist/$${destname}; \
 	rm -rf $${dest} && \
 	rm -f $${dest}.tgz && \
 	for file in $^; do \
 	  dir=`dirname $$file` && \
 	  mkdir -p $${dest}/$${dir} && \
 	  cp -a $${file} $${dest}/$${dir} || exit 1 ; \
 	done && \
 	(cd dist && tar zcf $${destname}.tgz $${destname}) && \
 	rm -rf $${dest}

 # Make a distribution of all the sources required to compile the C library.
 dist_libstemmer_c: \
            $(RUNTIME_SOURCES) \
            $(RUNTIME_HEADERS) \
            $(LIBSTEMMER_SOURCES) \
            $(LIBSTEMMER_UTF8_SOURCES) \
            $(LIBSTEMMER_HEADERS) \
            $(LIBSTEMMER_EXTRA) \
 	    $(C_LIB_SOURCES) \
            $(C_LIB_HEADERS) \
            libstemmer/mkinc.mak \
            libstemmer/mkinc_utf8.mak
 	destname=libstemmer_c; \
 	dest=dist/$${destname}; \
 	rm -rf $${dest} && \
 	rm -f $${dest}.tgz && \
 	mkdir -p $${dest} && \
 	cp -a doc/libstemmer_c_README $${dest}/README && \
 	mkdir -p $${dest}/examples && \
 	cp -a examples/stemwords.c $${dest}/examples && \
 	mkdir -p $${dest}/$(c_src_dir) && \
 	cp -a $(C_LIB_SOURCES) $(C_LIB_HEADERS) $${dest}/$(c_src_dir) && \
 	mkdir -p $${dest}/runtime && \
 	cp -a $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) $${dest}/runtime && \
 	mkdir -p $${dest}/libstemmer && \
 	cp -a $(LIBSTEMMER_SOURCES) $(LIBSTEMMER_UTF8_SOURCES) $(LIBSTEMMER_HEADERS) $(LIBSTEMMER_EXTRA) $${dest}/libstemmer && \
 	mkdir -p $${dest}/include && \
 	mv $${dest}/libstemmer/libstemmer.h $${dest}/include && \
 	(cd $${dest} && \
 	 echo "README" >> MANIFEST && \
 	 ls $(c_src_dir)/*.c $(c_src_dir)/*.h >> MANIFEST && \
 	 ls runtime/*.c runtime/*.h >> MANIFEST && \
 	 ls libstemmer/*.c libstemmer/*.h >> MANIFEST && \
 	 ls include/*.h >> MANIFEST) && \
        cp -a libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak $${dest}/ && \
 	echo 'include mkinc.mak' >> $${dest}/Makefile && \
 	echo 'CFLAGS=-Iinclude' >> $${dest}/Makefile && \
 	echo 'all: libstemmer.o stemwords' >> $${dest}/Makefile && \
 	echo 'libstemmer.o: $$(snowball_sources:.c=.o)' >> $${dest}/Makefile && \
 	echo '	$$(AR) -cru $$@ $$^' >> $${dest}/Makefile && \
 	echo 'stemwords: examples/stemwords.o libstemmer.o' >> $${dest}/Makefile && \
 	echo '	$$(CC) -o $$@ $$^' >> $${dest}/Makefile && \
 	echo 'clean:' >> $${dest}/Makefile && \
 	echo '	rm -f stemwords *.o $(c_src_dir)/*.o runtime/*.o libstemmer/*.o' >> $${dest}/Makefile && \
 	(cd dist && tar zcf $${destname}.tgz $${destname}) && \
 	rm -rf $${dest}

 # Make a distribution of all the sources required to compile the Java library.
 dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
            $(LIBSTEMMER_EXTRA) \
 	    $(JAVA_SOURCES)
 	destname=libstemmer_java; \
 	dest=dist/$${destname}; \
 	rm -rf $${dest} && \
 	rm -f $${dest}.tgz && \
 	mkdir -p $${dest} && \
 	cp -a doc/libstemmer_java_README $${dest}/README && \
 	mkdir -p $${dest}/$(java_src_dir) && \
 	cp -a $(JAVA_SOURCES) $${dest}/$(java_src_dir) && \
 	mkdir -p $${dest}/$(java_src_main_dir) && \
 	cp -a $(JAVARUNTIME_SOURCES) $${dest}/$(java_src_main_dir) && \
 	(cd $${dest} && \
 	 echo "README" >> MANIFEST && \
 	 ls $(java_src_dir)/*.java >> MANIFEST && \
 	 ls $(java_src_main_dir)/*.java >> MANIFEST) && \
 	(cd dist && tar zcf $${destname}.tgz $${destname}) && \
 	rm -rf $${dest}

 check: check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r

 check_utf8: $(libstemmer_algorithms:%=check_utf8_%)

 check_iso_8859_1: $(ISO_8859_1_algorithms:%=check_iso_8859_1_%)

 check_iso_8859_2: $(ISO_8859_2_algorithms:%=check_iso_8859_2_%)

 check_koi8r: $(KOI8_R_algorithms:%=check_koi8r_%)

 # Where the data files are located - assumed their repo is checked out as
 # a sibling to this one.
 STEMMING_DATA = ../snowball-data

 check_utf8_%: $(STEMMING_DATA)/% stemwords
 	@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with UTF-8"
 	@./stemwords -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt
 	@diff -u $</output.txt tmp.txt
 	@if [ -e $</diffs.txt ] ; \
 	then \
 	  ./stemwords -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt -p2 && \
 	  diff -u $</diffs.txt tmp.txt; \
 	fi
 	@rm tmp.txt

 check_iso_8859_1_%: $(STEMMING_DATA)/% stemwords
 	@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_1"
 	@python -c 'print(open("$</voc.txt").read().decode("utf8").encode("iso8859-1"))' | \
 	    ./stemwords -c ISO_8859_1 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
 	@python -c 'print(open("$</output.txt").read().decode("utf8").encode("iso8859-1"))' | \
 	    diff -u - tmp.txt
 	@rm tmp.txt

 check_iso_8859_2_%: $(STEMMING_DATA)/% stemwords
 	@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_2"
 	@python -c 'print(open("$</voc.txt").read().decode("utf8").encode("iso8859-2"))' | \
 	    ./stemwords -c ISO_8859_2 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
 	@python -c 'print(open("$</output.txt").read().decode("utf8").encode("iso8859-2"))' | \
 	    diff -u - tmp.txt
 	@rm tmp.txt

 check_koi8r_%: $(STEMMING_DATA)/% stemwords
 	@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with KOI8R"
 	@python -c 'print(open("$</voc.txt").read().decode("utf8").encode("koi8_r"))' | \
 	    ./stemwords -c KOI8_R -l `echo $<|sed 's!.*/!!'` -o tmp.txt
 	@python -c 'print(open("$</output.txt").read().decode("utf8").encode("koi8_r"))' | \
 	    diff -u - tmp.txt
 	@rm tmp.txt
--- a/contrib/snowball/NEWS
+++ b/contrib/snowball/NEWS
@@ -0,0 +1,407 @@
 Snowball 2.0.0 (2019-10-02)
 ===========================

 C/C++
 -----

 * Fully handle 4-byte UTF-8 sequences.  Previously `hop` and `next` handled
  sequences of any length, but commands which look at the character value only
  handled sequences up to length 3.  Fixes #89.

 * Fix handling of a 3-byte UTF-8 sequence in a grouping in `backwardmode`.

 Java
 ----

 * TestApp.java:

  - Always use UTF-8 for I/O.  Patch from David Corbett (#80).

  - Allow reading input from stdin.

  - Remove rather pointless "stem n times" feature.

  - Only lower case ASCII to match stemwords.c.

  - Stem empty lines too to match stemwords.c.

 Code Quality Improvements
 -------------------------

 * Fix various warnings from newer compilers.

 * Improve use of `const`.

 * Share common functions between compiler backends rather than having multiple
  copies of the same code.

 * Assorted code clean-up.

 * Initialise line_labelled member of struct generator to 0.  Previously we were
  invoking undefined behaviour, though in practice it'll be zero initialised on
  most platforms.

 New Code Generators
 -------------------

 * Add Python generator (#24).  Originally written by Yoshiki Shibukawa, with
  additional updates by Dmitry Shachnev.

 * Add Javascript generator.  Based on JSX generator (#26) written by Yoshiki
  Shibukawa.

 * Add Rust generator from Jakob Demler (#51).

 * Add Go generator from Marty Schoch (#57).

 * Add C# generator.  Based on patch from Cesar Souza (#16, #17).

 * Add Pascal generator.  Based on Delphi backend from stemming.zip file on old
  website (#75).

 New Language Features
 ---------------------

 * Add `len` and `lenof` to measure Unicode length.  These are similar to `size`
  and `sizeof` (respectively), but `size` and `sizeof` return the length in
  bytes under `-utf8`, whereas these new commands give the same result whether
  using `-utf8`, `-widechars` or neither (but under `-utf8` they are O(n) in
  the length of the string).  For compatibility with existing code which might
  use these as variable or function names, they stop being treated as tokens if
  declared to be a variable or function.

 * New `{U+1234}` stringdef notation for Unicode codepoints.

 * More versatile integer tests.  Now you can compare any two arithmetic
  expressions with a relational operator in parentheses after the `$`, so for
  example `$(len > 3)` can now be used when previously a temporary variable was
  required: `$tmp = len $tmp > 3`

 Code generation improvements
 ----------------------------

 * General:

  + Avoid unnecessarily saving and restoring of the cursor for more commands -
    `atlimit`, `do`, `set` and `unset` all leave the cursor alone or always
    restore its value, and for C `booltest` (which other languages already
    handled).

  + Special case handling for `setlimit tomark AE`.  All uses of setlimit in
    the current stemmers we ship follow this pattern, and by special-casing we
    can avoid having to save and restore the cursor (#74).

  + Merge duplicate actions in the same `among`.  This reduces the size of the
    switch/if-chain in the generated code which dispatch the among for many of
    the stemmers.

  + Generate simpler code for `among`.  We always check for a zero return value
    when we call the among, so there's no point also checking for that in the
    switch/if-chain.  We can also avoid the switch/if-chain entirely when
    there's only one possible outcome (besides the zero return).

  + Optimise code generated for `do <function call>`.  This speeds up "make
    check_python" by about 2%, and should speed up other interpreted languages
    too (#110).

  + Generate more and better comments referencing snowball source.

  + Add homepage URL and compiler version as comments in generated files.

 * C/C++:

  + Fix `size` and `sizeof` to not report one too high (reported by Assem
    Chelli in #32).

  + If signal `f` from a function call would lead to return from the current
    function then handle this and bailing out on an error together with a
    simple `if (ret <= 0) return ret;`

  + Inline testing for a single character literals.

  + Avoiding generating `|| 0` in corner case - this can result in a compiler
    warning when building the generated code.

  + Implement `insert_v()` in terms of `insert_s()`.

  + Add conditional `extern "C"` so `runtime/api.h` can be included from C++
    code.  Closes #90, reported by vvarma.

 * Java:

  + Fix functions in `among` to work in Java.  We seem to need to make the
    methods called from among `public` instead of `private`, and to call them
    on `this` instead of the `methodObject` (which is cleaner anyway).  No
    revision in version control seems to generate working code for this case,
    but Richard says it definitely used to work - possibly older JVMs failed to
    correctly enforce the access controls when methods were invoked by
    reflection.

  + Code after handling `f` by returning from the current function is
    unreachable too.

  + Previously we incorrectly decided that code after an `or` was
    unreachable in certain cases.  None of the current stemmers in the
    distribution triggered this, but Martin Porter's snowball version
    of the Schinke Latin stemmer does.  Fixes #58, reported by Alexander
    Myltsev.

  + The reachability logic was failing to consider reachability from
    the final command in an `or`.  Fixes #82, reported by David Corbett.

  + Fix `maxint` and `minint`.  Patch from David Corbett in #31.

  + Fix `$` on strings.  The previous generated code was just wrong.  This
    doesn't affect any of the included algorithms, but for example breaks
    Martin Porter's snowball implementation of Schinke's Latin Stemmer.
    Issue noted by Jakob Demler while working on the Rust backend in #51,
    and reported in the Schinke's Latin Stemmer by Alexander Myltsev
    in #58.

  + Make SnowballProgram objects serializable.  Patch from Oleg Smirnov in #43.

  + Eliminate range-check implementation for groupings.  This was removed from
    the C generator 10 years earlier, isn't used for any of the existing
    algorithms, and it doesn't seem likely it would be - the grouping would
    have to consist entirely of a contiguous block of Unicode code-points.

  + Simplify code generated for `repeat` and `atleast`.

  + Eliminate unused return values and variables from runtime functions.

  + Only import the `among` and `SnowballProgram` classes if they're actually
    used.

  + Only generate `copy_from()` method if it's used.

  + Merge runtime functions `eq_s` and `eq_v` functions.

  + Java arrays know their own length so stop storing it separately.

  + Escape char 127 (DEL) in generated Java code.  It's unlikely that this
    character would actually be used in a real stemmer, so this was more of a
    theoretical bug.

  + Drop unused import of InvocationTargetException from SnowballStemmer.
    Reported by GerritDeMeulder in #72.

  + Fix lint check issues in generated Java code.  The stemmer classes are only
    referenced in the example app via reflection, so add
    @SuppressWarnings("unused") for them.  The stemmer classes override
    equals() and hashCode() methods from the standard java Object class, so
    mark these with @Override.  Both suggested by GerritDeMeulder in #72.

  + Declare Java variables at point of use in generated code.  Putting all
    declarations at the top of the function was adding unnecessary complexity
    to the Java generator code for no benefit.

  + Improve formatting of generated code.

 New stemming algorithms
 -----------------------

 * Add Tamil stemmer from Damodharan Rajalingam (#2, #3).

 * Add Arabic stemmer from Assem Chelli (#32, #50).

 * Add Irish stemmer Jim O'Regan (#48).

 * Add Nepali stemmer from Arthur Zakirov (#70).

 * Add Indonesian stemmer from Olly Betts (#71).

 * Add Hindi stemmer from Olly Betts (#73). Thanks to David Corbett for review.

 * Add Lithuanian stemmer from Dainius Jocas (#22, #76).

 * Add Greek stemmer from Oleg Smirnov (#44).

 * Add Catalan and Basque stemmers from Israel Olalla (#104).

 Behavioural changes to existing algorithms
 ------------------------------------------

 * Portuguese:

  + Replace incorrect Spanish suffixes by Portuguese suffixes (#1).

 * French:

  + The MSDOS CP850 version of the French algorithm was missing changes present
    in the ISO8859-1 and Unicode versions.  There's now a single version of
    each algorithm which was based on the Unicode version.

  + Recognize French suffixes even when they begin with diaereses.  Patch from
    David Corbett in #78.

 * Russian:

  + We now normalise 'ё' to 'е' before stemming.  The documentation has long
    said "we assume ['ё'] is mapped into ['е']" but it's more convenient for
    the stemmer to actually perform this normalisation.  This change has no
    effect if the caller is already normalising as we recommend.  It's a change
    in behaviour they aren't, but 'ё' occurs rarely (there are currently no
    instances in our test vocabulary) and this improves behaviour when it does
    occur.  Patch from Eugene Mirotin (#65, #68).

 * Finish:

  + Adjust the Finnish algorithm not to mangle numbers.  This change also
    means it tends to leave foreign words alone.  Fixes #66.

 * Danish:

  + Adjust Danish algorithm not to mangle alphanumeric codes. In particular
    alphanumeric codes ending in a double digit (e.g. 0x0e00, hal9000,
    space1999) are no longer mangled.  See #81.

 Optimisations to existing algorithms
 ------------------------------------

 * Turkish:

  + Simplify uses of `test` in stemmer code.

  + Check for 'ad' or 'soyad' more efficiently, and without needing the
    strlen variable.  This speeds up "make check_utf8_turkish" by 11%
    on x86 Linux.

 * Kraaij-Pohlmann:

  + Eliminate variable x `$p1 <= cursor` is simpler and a little more efficient
    than `setmark x $x >= p1`.

 Code clarity improvements to existing algorithms
 ------------------------------------------------

 * Turkish:

  + Use , for cedilla to match the conventions used in other stemmers.

 * Kraaij-Pohlmann:

  + Avoid cryptic `[among ( (])` ... `)` construct - instead use the same
    `[substring] among (` ... `)` construct we do in other stemmers.

 Compiler
 --------

 * Support conventional --help and --version options.

 * Warn if -r or -ep used with backend other than C/C++.

 * Warn if encoding command line options are specified when generating code in a
  language with a fixed encoding.

 * The default classname is now set based on the output filename, so `-n` is now
  often no longer needed.  Fixes #64.

 * Avoid potential one byte buffer over-read when parsing snowball code.

 * Avoid comparing with uninitialised array element during compilation.

 * Improve `-syntax` output for `setlimit L for C`.

 * Optimise away double negation so generators don't have to worry about
  generating `--` (decrement operator in many languages).  Fixes #52, reported
  by David Corbett.

 * Improved compiler error and warning messages:

  - We now report FILE:LINE: before each diagnostic message.

  - Improve warnings for unused declarations/definitions.

  - Warn for variables which are used, but either never initialised
    or never read.

  - Flag non-ASCII literal strings.  This is an error for wide Unicode, but
    only a warning for single-byte and UTF-8 which work so long as the source
    encoding matches the encoding used in the generated stemmer code.

  - Improve error recovery after an undeclared `define`.  We now sniff the
    token after the identifier and if it is `as` we parse as a routine,
    otherwise we parse as a grouping.  Previously we always just assumed it was
    a routine, which gave a confusing second error if it was a grouping.

  - Improve error recovery after an unexpected token in `among`.  Previously
    we acted as if the unexpected token closed the `among` (this probably
    wasn't intended but just a missing `break;` in a switch statement).  Now we
    issue an error and try the next token.

 * Report error instead of silently truncating character values (e.g. `hex 123`
  previously silently became byte 0x23 which is `#` rather than a
  g-with-cedilla).

 * Enlarge the initial input buffer size to 8192 bytes and double each time we
  hit the end.  Snowball programs are typically a few KB in size (with the
  current largest we ship being the Greek stemmer at 27KB) so the previous
  approach of starting with a 10 byte input buffer and increasing its size by
  50% plus 40 bytes each time it filled was inefficient, needing up to 15
  reallocations to load greek.sbl.

 * Identify variables only used by one `routine`/`external`.  This information
  isn't yet used, but such variables which are also always written to before
  being read can be emitted as local variables in most target languages.

 * We now allow multiple source files on command line, and allow them to be
  after (or even interspersed) with options to better match modern Unix
  conventions.  Support for multiple source files allows specifying a single
  byte character set mapping via a source file of `stringdef`.

 * Avoid infinite recursion in compiler when optimising a recursive snowball
  function.  Recursive functions aren't typical in snowball programs, but
  the compiler shouldn't crash for any input, especially not a valid one.
  We now simply limit on how deep the compiler will recurse and make the
  pessimistic assumption in the unlikely event we hit this limit.

 Build system:

 * `make clean` in C libstemmer_c distribution now removes `examples/*.o`.
  (#59)

 * Fix all the places which previously had to have a list of stemmers to work
  dynamically or be generated, so now only modules.txt needs updating to add
  a new stemmer.

 * Add check_java make target which runs tests for java.

 * Support gzipped test data (the uncompressed arabic test data is too big for
  github).

 * GNUmakefile: Drop useless `-eprefix` and `-r` options from snowball
  invocations for Java - these are only meaningful when generating C code.

 * Pass CFLAGS when linking which matches convention (e.g. automake does it) and
  facilitates use of tools such as ASan.  Fixes #84, reported by Thomas
  Pointhuber.

 * Add CI builds with -std=c90 to check compiler and generated code are C90
  (#54)

 libstemmer stuff:

 * Split out CPPFLAGS from CFLAGS and use CFLAGS when linking stemwords.

 * Add -O2 to CFLAGS.

 * Make generated tables of encodings and modules const.

 * Fix clang static analyzer memory leak warning (in practice this code path
  can never actually be taken).  Patch from Patrick O. Perry (#56)

 documentation

 * Added copyright and licensing details (#10).

 * Document that libstemmer supports ISO_8859_2 encoding.  Currently hungarian
  and romanian are available in ISO_8859_2.

 * Remove documentation falsely claiming that libstemmer supports CP850
  encoding.

 * CONTRIBUTING.rst: Add guidance for contributing new stemming algorithms and
  new language backends.

 * Overhaul libstemmer_python_README.  Most notably, replace the benchmark data
  which was very out of date.
--- a/contrib/snowball/algorithms/arabic.sbl
+++ b/contrib/snowball/algorithms/arabic.sbl
@@ -0,0 +1,561 @@
 /*
 * Authors:
 * - Assem Chelli, < assem [dot] ch [at] gmail >
 * - Abdelkrim Aries <ab [underscore] aries [at] esi [dot] dz>
 *
 */

 stringescapes { }

 /* the Arabic letters in Unicode */
 // Hamza
 stringdef o     '{U+0621}' // Hamza
 stringdef ao    '{U+0623}' // Hamza above Alef
 stringdef ao_   '{U+0625}' // Hamza below Alef
 stringdef a~    '{U+0622}' // Alef madda
 stringdef wo    '{U+0624}' // Hamza above waw
 stringdef yo    '{U+0626}' // Hamza above yeh

 // Letters
 stringdef a     '{U+0627}' // Alef
 stringdef a_    '{U+0649}' // Alef Maksura
 stringdef b     '{U+0628}' // Beh
 stringdef t_    '{U+0629}' // Teh_Marbuta
 stringdef t     '{U+062A}' // Teh
 stringdef th    '{U+062B}' // Theh
 stringdef j     '{U+062C}' // Jeem
 stringdef h     '{U+062D}' // Hah
 stringdef x     '{U+062E}' // Khah
 stringdef d     '{U+062F}' // Dal
 stringdef dz    '{U+0630}' // Thal
 stringdef r     '{U+0631}' // Reh
 stringdef z     '{U+0632}' // Zain
 stringdef s     '{U+0633}' // Seen
 stringdef sh    '{U+0634}' // Sheen
 stringdef c     '{U+0635}' // Sad
 stringdef dh    '{U+0636}' // Dad
 stringdef tt    '{U+0637}' // Tah
 stringdef zh    '{U+0638}' // Zah
 stringdef i     '{U+0639}' // Ain
 stringdef gh    '{U+063A}' // Ghain
 stringdef f     '{U+0641}' // Feh
 stringdef q     '{U+0642}' // Qaf
 stringdef k     '{U+0643}' // Kaf
 stringdef l     '{U+0644}' // Lam
 stringdef m     '{U+0645}' // Meem
 stringdef n     '{U+0646}' // Noon
 stringdef e     '{U+0647}' // Heh
 stringdef w     '{U+0648}' // Waw
 stringdef y     '{U+064A}' // Yeh

 // Diacritics
 stringdef aan   '{U+064B}' // FatHatan
 stringdef uun   '{U+064C}' // Dammatan
 stringdef iin   '{U+064D}' // Kasratan
 stringdef aa    '{U+064E}' // FatHa
 stringdef uu    '{U+064F}' // Damma
 stringdef ii    '{U+0650}' // Kasra
 stringdef oo    '{U+0652}' // Sukun
 stringdef ~     '{U+0651}' // Shadda

 // Hindu–Arabic numerals
 stringdef 0     '{U+0660}'
 stringdef 1     '{U+0661}'
 stringdef 2     '{U+0662}'
 stringdef 3     '{U+0663}'
 stringdef 4     '{U+0664}'
 stringdef 5     '{U+0665}'
 stringdef 6     '{U+0666}'
 stringdef 7     '{U+0667}'
 stringdef 8     '{U+0668}'
 stringdef 9     '{U+0669}'


 // Kasheeda
 stringdef _     '{U+0640}' // Kasheeda, Tatweel

 // Shaped forms
 stringdef o1     '{U+FE80}'  // HAMZA
 stringdef ao1    '{U+FE83}'  // ALEF_HAMZA_ABOVE
 stringdef ao2    '{U+FE84}'  // ALEF_HAMZA_ABOVE
 stringdef ao_1   '{U+FE87}'  // ALEF_HAMZA_BELOW
 stringdef ao_2   '{U+FE88}'  // ALEF_HAMZA_BELOW
 stringdef yo1    '{U+FE8B}'  // YEH_HAMZA
 stringdef yo2    '{U+FE8C}'  // YEH_HAMZA
 stringdef yo3    '{U+FE89}'  // YEH_HAMZA
 stringdef yo4    '{U+FE8A}'  // YEH_HAMZA
 stringdef a~1    '{U+FE81}'  // ALEF_MADDA
 stringdef a~2    '{U+FE82}'  // ALEF_MADDA
 stringdef wo1    '{U+FE85}'  // WAW_HAMZA
 stringdef wo2    '{U+FE86}'  // WAW_HAMZA
 stringdef a1     '{U+FE8D}'  // ALEF
 stringdef a2     '{U+FE8E}'  // ALEF
 stringdef b1     '{U+FE8F}'  // BEH
 stringdef b2     '{U+FE90}'  // BEH
 stringdef b3     '{U+FE91}'  // BEH
 stringdef b4     '{U+FE92}'  // BEH
 stringdef t_1    '{U+FE93}'  // TEH_MARBUTA
 stringdef t_2    '{U+FE94}'  // TEH_MARBUTA
 stringdef t1     '{U+FE97}'  // TEH
 stringdef t2     '{U+FE98}'  // TEH
 stringdef t3     '{U+FE95}'  // TEH
 stringdef t4     '{U+FE96}'  // TEH
 stringdef th1    '{U+FE9B}'  // THEH
 stringdef th2    '{U+FE9C}'  // THEH
 stringdef th3    '{U+FE9A}'  // THEH
 stringdef th4    '{U+FE99}'  // THEH
 stringdef j1     '{U+FE9F}'  // JEEM
 stringdef j2     '{U+FEA0}'  // JEEM
 stringdef j3     '{U+FE9D}'  // JEEM
 stringdef j4     '{U+FE9E}'  // JEEM
 stringdef h1     '{U+FEA3}'  // HAH
 stringdef h2     '{U+FEA4}'  // HAH
 stringdef h3     '{U+FEA1}'  // HAH
 stringdef h4     '{U+FEA2}'  // HAH
 stringdef x1     '{U+FEA7}'  // KHAH
 stringdef x2     '{U+FEA8}'  // KHAH
 stringdef x3     '{U+FEA5}'  // KHAH
 stringdef x4     '{U+FEA6}'  // KHAH
 stringdef d1     '{U+FEA9}'  // DAL
 stringdef d2     '{U+FEAA}'  // DAL
 stringdef dz1    '{U+FEAB}'  // THAL
 stringdef dz2    '{U+FEAC}'  // THAL
 stringdef r1     '{U+FEAD}'  // REH
 stringdef r2     '{U+FEAE}'  // REH
 stringdef z1     '{U+FEAF}'  // ZAIN
 stringdef z2     '{U+FEB0}'  // ZAIN
 stringdef s1     '{U+FEB3}'  // SEEN
 stringdef s2     '{U+FEB4}'  // SEEN
 stringdef s3     '{U+FEB1}'  // SEEN
 stringdef s4     '{U+FEB2}'  // SEEN
 stringdef sh1    '{U+FEB7}'  // SHEEN
 stringdef sh2    '{U+FEB8}'  // SHEEN
 stringdef sh3    '{U+FEB5}'  // SHEEN
 stringdef sh4    '{U+FEB6}'  // SHEEN
 stringdef c1     '{U+FEBB}'  // SAD
 stringdef c2     '{U+FEBC}'  // SAD
 stringdef c3     '{U+FEB9}'  // SAD
 stringdef c4     '{U+FEBA}'  // SAD
 stringdef dh1    '{U+FEBF}'  // DAD
 stringdef dh2    '{U+FEC0}'  // DAD
 stringdef dh3    '{U+FEBD}'  // DAD
 stringdef dh4    '{U+FEBE}'  // DAD
 stringdef tt1    '{U+FEC3}'  // TAH
 stringdef tt2    '{U+FEC4}'  // TAH
 stringdef tt3    '{U+FEC1}'  // TAH
 stringdef tt4    '{U+FEC2}'  // TAH
 stringdef zh1    '{U+FEC7}'  // ZAH
 stringdef zh2    '{U+FEC8}'  // ZAH
 stringdef zh3    '{U+FEC5}'  // ZAH
 stringdef zh4    '{U+FEC6}'  // ZAH
 stringdef i1     '{U+FECB}'  // AIN
 stringdef i2     '{U+FECC}'  // AIN
 stringdef i3     '{U+FEC9}'  // AIN
 stringdef i4     '{U+FECA}'  // AIN
 stringdef gh1    '{U+FECF}'  // GHAIN
 stringdef gh2    '{U+FED0}'  // GHAIN
 stringdef gh3    '{U+FECD}'  // GHAIN
 stringdef gh4    '{U+FECE}'  // GHAIN
 stringdef f1     '{U+FED3}'  // FEH
 stringdef f2     '{U+FED4}'  // FEH
 stringdef f3     '{U+FED1}'  // FEH
 stringdef f4     '{U+FED2}'  // FEH
 stringdef q1     '{U+FED7}'  // QAF
 stringdef q2     '{U+FED8}'  // QAF
 stringdef q3     '{U+FED5}'  // QAF
 stringdef q4     '{U+FED6}'  // QAF
 stringdef k1     '{U+FEDB}'  // KAF
 stringdef k2     '{U+FEDC}'  // KAF
 stringdef k3     '{U+FED9}'  // KAF
 stringdef k4     '{U+FEDA}'  // KAF
 stringdef l1     '{U+FEDF}'  // LAM
 stringdef l2     '{U+FEE0}'  // LAM
 stringdef l3     '{U+FEDD}'  // LAM
 stringdef l4     '{U+FEDE}'  // LAM
 stringdef m1     '{U+FEE3}'  // MEEM
 stringdef m2     '{U+FEE4}'  // MEEM
 stringdef m3     '{U+FEE1}'  // MEEM
 stringdef m4     '{U+FEE2}'  // MEEM
 stringdef n1     '{U+FEE7}'  // NOON
 stringdef n2     '{U+FEE8}'  // NOON
 stringdef n3     '{U+FEE5}'  // NOON
 stringdef n4     '{U+FEE6}'  // NOON
 stringdef e1     '{U+FEEB}'  // HEH
 stringdef e2     '{U+FEEC}'  // HEH
 stringdef e3     '{U+FEE9}'  // HEH
 stringdef e4     '{U+FEEA}'  // HEH
 stringdef w1     '{U+FEED}'  // WAW
 stringdef w2     '{U+FEEE}'  // WAW
 stringdef a_1    '{U+FEEF}'  // ALEF_MAKSURA
 stringdef a_2    '{U+FEF0}'  // ALEF_MAKSURA
 stringdef y1     '{U+FEF3}'  // YEH
 stringdef y2     '{U+FEF4}'  // YEH
 stringdef y3     '{U+FEF1}'  // YEH
 stringdef y4     '{U+FEF2}'  // YEH

 // Ligatures Lam-Alef
 stringdef la      '{U+FEFB}' // LAM_ALEF
 stringdef la2     '{U+FEFC}' // LAM_ALEF
 stringdef lao     '{U+FEF7}' // LAM_ALEF_HAMZA_ABOVE
 stringdef lao2    '{U+FEF8}' // LAM_ALEF_HAMZA_ABOVE
 stringdef lao_    '{U+FEF9}' // LAM_ALEF_HAMZA_BELOW
 stringdef lao_2   '{U+FEFA}' // LAM_ALEF_HAMZA_BELOW
 stringdef la~     '{U+FEF5}' // LAM_ALEF_MADDA_ABOVE
 stringdef la~2    '{U+FEF6}' // LAM_ALEF_MADDA_ABOVE


 booleans (
            is_noun
            is_verb
            is_defined
         )

 routines (
    Prefix_Step1
    Prefix_Step2
    Prefix_Step3a_Noun
    Prefix_Step3b_Noun
    Prefix_Step3_Verb
    Prefix_Step4_Verb

    Suffix_All_alef_maqsura
    Suffix_Noun_Step1a
    Suffix_Noun_Step1b
    Suffix_Noun_Step2a
    Suffix_Noun_Step2b
    Suffix_Noun_Step2c1
    Suffix_Noun_Step2c2
    Suffix_Noun_Step3
    Suffix_Verb_Step1
    Suffix_Verb_Step2a
    Suffix_Verb_Step2b
    Suffix_Verb_Step2c

    Normalize_post
    Normalize_pre

    Checks1
 )

 externals ( stem )

 groupings (  )


 // Normalizations
 define Normalize_pre as (
    do repeat (
        (
            [substring] among (
                '{aan}' '{uun}' '{iin}' '{aa}' '{uu}' '{ii}' '{oo}' '{~}'( delete ) // strip vocalization
                '{_}' ( delete ) // strip kasheeda

                // Hindu–Arabic numerals
                '{0}' ( <- '0')
                '{1}' ( <- '1')
                '{2}' ( <- '2')
                '{3}' ( <- '3')
                '{4}' ( <- '4')
                '{5}' ( <- '5')
                '{6}' ( <- '6')
                '{7}' ( <- '7')
                '{8}' ( <- '8')
                '{9}' ( <- '9')

                // Shaped forms
                '{o1}' ( <- '{o}' ) // HAMZA
                '{ao1}' '{ao2}'  ( <- '{ao}' ) // ALEF_HAMZA_ABOVE
                '{ao_1}' '{ao_2}' ( <- '{ao_}' ) // ALEF_HAMZA_BELOW
                '{yo1}'  '{yo2}' '{yo3}'  '{yo4}'  ( <- '{yo}' ) // YEH_HAMZA
                '{a~1}'  '{a~2}'( <- '{a~}' ) // ALEF_MADDA
                '{wo1}' '{wo2}'( <- '{wo}' ) // WAW_HAMZA
                '{a1}' '{a2}' ( <- '{a}' ) // ALEF
                '{b1}' '{b2}' '{b3}'  '{b4}'  ( <- '{b}' ) // BEH
                '{t_1}'  '{t_2}' ( <- '{t_}' ) // TEH_MARBUTA
                '{t1}'   '{t2}' '{t3}' '{t4}'  ( <- '{t}' ) // TEH
                '{th1}' '{th2}' '{th3}' '{th4}' ( <- '{th}' ) // THEH
                '{j1}' '{j2}'  '{j3}' '{j4}'(  <- '{j}' ) // JEEM
                '{h1}' '{h2}' '{h3}' '{h4}' ( <- '{h}' ) // HAH
                '{x1}' '{x2}' '{x3}' '{x4}'( <- '{x}' ) // KHAH
                '{d1}'  '{d2}'  ( <- '{d}' ) // DAL
                '{dz1}''{dz2}' ( <- '{dz}' ) // THAL
                '{r1}' '{r2}'( <- '{r}' ) // REH
                '{z1}' '{z2}'  ( <- '{z}' ) // ZAIN
                '{s1}'  '{s2}'   '{s3}' '{s4}'( <- '{s}' ) // SEEN
                '{sh1}' '{sh2}' '{sh3}' '{sh4}' ( <- '{sh}' ) // SHEEN
                '{c1}' '{c2}' '{c3}' '{c4}'( <- '{c}' ) // SAD
                '{dh1}'    '{dh2}'   '{dh3}'  '{dh4}'( <- '{dh}' ) // DAD
                '{tt1}'  '{tt2}'  '{tt3}'  '{tt4}' ( <- '{tt}' ) // TAH
                '{zh1}' '{zh2}' '{zh3}'    '{zh4}'( <- '{zh}' ) // ZAH
                '{i1}' '{i2}' '{i3}'  '{i4}'( <- '{i}' ) // AIN
                '{gh1}' '{gh2}' '{gh3}'  '{gh4}'( <- '{gh}' ) // GHAIN
                '{f1}'  '{f2}' '{f3}'  '{f4}' ( <- '{f}' ) // FEH
                '{q1}' '{q2}' '{q3}' '{q4}' ( <- '{q}' ) // QAF
                '{k1}' '{k2}' '{k3}'  '{k4}'( <- '{k}' ) // KAF
                '{l1}' '{l2}' '{l3}' '{l4}'( <- '{l}' ) // LAM
                '{m1}' '{m2}'  '{m3}' '{m4}'   ( <- '{m}' ) // MEEM
                '{n1}'  '{n2}' '{n3}'  '{n4}'( <- '{n}' ) // NOON
                '{e1}' '{e2}' '{e3}' '{e4}' ( <- '{e}' ) // HEH
                '{w1}'  '{w2}'  ( <- '{w}' ) // WAW
                '{a_1}' '{a_2}' ( <- '{a_}' ) // ALEF_MAKSURA
                '{y1}' '{y2}' '{y3}' '{y4}' ( <- '{y}' ) // YEH

                // Ligatures Lam-Alef
                '{la}'  '{la2}'     (<- '{l}{a}')
                '{lao}'  '{lao2}'   (<- '{l}{ao}')
                '{lao_}'  '{lao_2}' (<- '{l}{ao_}')
                '{la~}'  '{la~2}'    (<- '{l}{a~}')

            )
        )
        or
        next
    )
 )

 define Normalize_post as (

    do (
        // normalize last hamza
        backwards (
        [substring] among (
            '{ao}''{ao_}' '{a~}' ( <- '{o}')
        '{wo}' ( <- '{o}')
        '{yo}' ( <- '{o}')
        )
        )
    )

    do repeat (
        (
        // normalize other hamza's
            [substring] among (
                '{ao}''{ao_}' '{a~}' ( <- '{a}')
                '{wo}' ( <- '{w}')
                '{yo}' ( <- '{y}')
            )
        )
        or
        next
    )
 )

 // Checks
 define Checks1 as (
    [substring] among (
        '{b}{a}{l}' '{k}{a}{l}' ($(len > 4)  set is_noun  unset is_verb set is_defined)
        '{l}{l}' '{a}{l}' ($(len > 3)  set is_noun unset is_verb set is_defined)
    )
 )


 //prefixes
 define Prefix_Step1 as (
         [substring] among (
             '{ao}{ao}' ($(len > 3) <-  '{ao}'  )
             '{ao}{a~}' ($(len > 3) <-  '{a~}'  )
             '{ao}{wo}' ($(len > 3) <-  '{ao}'  )
             '{ao}{a}' ($(len > 3) <-  '{a}'  )
             '{ao}{ao_}' ($(len > 3) <-  '{ao_}'  )
            // '{ao}' ($(len > 3) delete) //rare case
        )
 )

 define Prefix_Step2 as (
        not '{f}{a}'
        not '{w}{a}'
        [substring] among (
            '{f}' ($(len > 3) delete)
            '{w}' ($(len > 3) delete)
        )
 )

 define Prefix_Step3a_Noun as ( // it is noun and defined
        [substring] among (
            '{b}{a}{l}' '{k}{a}{l}' ($(len > 5) delete)
            '{l}{l}' '{a}{l}' ($(len > 4) delete)
        )
 )

 define Prefix_Step3b_Noun as ( // probably  noun and defined
        not '{b}{a}' // exception
        [substring] among (
            '{b}' ($(len > 3) delete)
            // '{k}'  '{l}' ($(len > 3) delete) // BUG: cause confusion
            '{b}{b}' ($(len > 3) <-  '{b}'  )
            '{k}{k}'  ($(len > 3) <-  '{k}'  )
           )

 )

 define Prefix_Step3_Verb as (
        [substring] among (
            //'{s}' ($(len > 4) delete)// BUG: cause confusion
            '{s}{y}' ($(len > 4) <- '{y}' )
            '{s}{t}' ($(len > 4) <- '{t}')
            '{s}{n}' ($(len > 4) <- '{n}')
            '{s}{ao}' ($(len > 4) <- '{ao}')
        )
 )

 define Prefix_Step4_Verb as (
        [substring] among (
            '{y}{s}{t}' '{n}{s}{t}' '{t}{s}{t}' ($(len > 4) set is_verb unset is_noun <-  '{a}{s}{t}' )
        )
 )

 // suffixes
 backwardmode (

        define Suffix_Noun_Step1a as (
                [substring] among (
                        '{y}' '{k}' '{e}' ($(len >= 4) delete)
                        '{n}{a}' '{k}{m}' '{e}{a}' '{e}{n}' '{e}{m}' ($(len >= 5)  delete)
                        '{k}{m}{a}' '{e}{m}{a}' ($(len >= 6) delete)
                )
            )
        define Suffix_Noun_Step1b as (
            [substring] among (
                '{n}' ($(len > 5) delete)
            )
        )

        define Suffix_Noun_Step2a as (
                [substring] among (
                        '{a}' '{y}' '{w}' ($(len > 4) delete)
                )
            )

        define Suffix_Noun_Step2b as (
            [substring] among (
                '{a}{t}' ($(len >= 5) delete)
            )
        )

        define Suffix_Noun_Step2c1 as (
            [substring] among (
                '{t}' ($(len >= 4) delete)
            )
        )
        define Suffix_Noun_Step2c2 as ( // feminine t_
            [substring] among (
                '{t_}' ($(len >= 4) delete)
            )
        )
        define Suffix_Noun_Step3 as ( // ya' nisbiya
            [substring] among (
                '{y}' ($(len >= 3) delete)
            )
        )

        define Suffix_Verb_Step1 as (
                [substring] among (
                        '{e}' '{k}' ($(len >= 4) delete)
                        '{n}{y}' '{n}{a}' '{e}{a}' '{e}{m}' '{e}{n}' '{k}{m}' '{k}{n}' ($(len >= 5) delete)
                        '{e}{m}{a}' '{k}{m}{a}' '{k}{m}{w}'($(len >= 6) delete)
                )
            )
        define Suffix_Verb_Step2a as (
                [substring] among (
                       '{t}' ($(len >= 4)  delete)
                        '{a}' '{n}' '{y}' ($(len >= 4) delete)
                        '{n}{a}' '{t}{a}'  '{t}{n}'  ($(len >= 5) delete)// past
                        '{a}{n}' '{w}{n}' '{y}{n}' ($(len > 5) delete) // present
                        '{t}{m}{a}' ($(len >= 6) delete)
                )
            )

        define Suffix_Verb_Step2b as (
            [substring] among (
                '{w}{a}' '{t}{m}' ($(len >= 5) delete)
            )
        )


        define Suffix_Verb_Step2c as (
            [substring] among (
                '{w}' ($(len >= 4) delete)
                '{t}{m}{w}' ($(len >= 6) delete)
            )
        )

        define Suffix_All_alef_maqsura as (
            [substring] among (
                '{a_}' ( <- '{y}' ) // spell error
                // '{a_}' ( delete ) // if noun > 3
                // '{a_}' ( <- '{a}') // if verb
            )
        )
 )

 define stem as (
    // set initial values
    set is_noun
    set is_verb
    unset is_defined

    // guess type and properties
    do Checks1

    // normalization pre-stemming
    do Normalize_pre


    backwards (

       do (
              //Suffixes for verbs
            (
           is_verb
           (
               (
                  (atleast 1 Suffix_Verb_Step1)
                  ( Suffix_Verb_Step2a or Suffix_Verb_Step2c  or next)
                )
                or Suffix_Verb_Step2b
                or Suffix_Verb_Step2a
            )
           )
            //Suffixes for nouns
          or (
               is_noun
                (

                 try (
                     Suffix_Noun_Step2c2
                     or (not is_defined Suffix_Noun_Step1a (
                            Suffix_Noun_Step2a
                            or Suffix_Noun_Step2b
                            or Suffix_Noun_Step2c1
                            or next))
                     or (Suffix_Noun_Step1b (
                            Suffix_Noun_Step2a
                            or Suffix_Noun_Step2b
                            or Suffix_Noun_Step2c1))
                     or (not is_defined Suffix_Noun_Step2a)
                     or (Suffix_Noun_Step2b)
                 )
                 Suffix_Noun_Step3
                 )

            )

            // Suffixes for alef maqsura
            or  Suffix_All_alef_maqsura
        )
    )

    //Prefixes
    do (
       try Prefix_Step1
       try Prefix_Step2
       ( Prefix_Step3a_Noun
         or (is_noun Prefix_Step3b_Noun)
         or (is_verb try Prefix_Step3_Verb Prefix_Step4_Verb)
         )
    )

    // normalization post-stemming
    do Normalize_post

 )
--- a/contrib/snowball/algorithms/basque.sbl
+++ b/contrib/snowball/algorithms/basque.sbl
@@ -0,0 +1,149 @@
 routines (
           aditzak
           izenak
           adjetiboak
           mark_regions
           RV R2 R1
 )

 externals ( stem )

 integers ( pV p1 p2 )

 groupings ( v )

 stringescapes {}

 /* special characters */

 stringdef n~ '{U+00F1}'

 define v 'aeiou'

 define mark_regions as (

    $pV = limit
    $p1 = limit
    $p2 = limit  // defaults

    do (
        ( v (non-v gopast v) or (v gopast non-v) )
        or
        ( non-v (non-v gopast v) or (v next) )
        setmark pV
    )
    do (
        gopast v gopast non-v setmark p1
        gopast v gopast non-v setmark p2
    )
 )

 backwardmode (

    define RV as $pV <= cursor
    define R2 as $p2 <= cursor
    define R1 as $p1 <= cursor

    define aditzak as (
        [substring] among(
            'le' 'la' 'tzaile' 'aldatu' 'atu' 'tzailea' 'taile' 'tailea' 'pera' 'gale' 'galea'
            'gura' 'kura' 'kor' 'korra' 'or' 'orra' 'tun' 'tuna' 'gaitz' 'gaitza'
            'kaitz' 'kaitza' 'ezin' 'ezina' 'tezin' 'tezina' 'errez' 'erreza'
            'karri' 'karria' 'tzaga' 'tzaka' 'tzake' 'tzeke' 'ez' 'eza' 'tzez'
            'keta' 'eta' 'etan' 'pen' 'pena' 'tze' 'atze' 'kuntza' 'kunde' 'kundea'
            'kune' 'kunea' 'kuna' 'kera' 'era' 'kizun' 'kizuna' 'dura' 'tura' 'men' 'mena'
            'go' 'ago' 'tio' 'taldi' 'taldia' 'aldi' 'aldia' 'gune' 'gunea' 'bide' 'bidea'
            'pide' 'pidea' 'gai' 'gaia' 'ki' 'kin' 'rekin' 'kina' 'kari' 'karia' 'ari' 'tari' 'etari'
            'gailu' 'gailua' 'kide' 'kidea' 'ide' 'idea' 'du' 'ka' 'kan' 'an' 'ean' 'tu' 'lari' 'tatu'
            'rean' 'tarazi' 'arazi' 'tzat' 'bera' 'dako'
            ( RV delete )
            'garri' 'garria' 'tza'
            (R2 delete)
            'atseden'
            (<- 'atseden')
            'arabera'
            (<- 'arabera')
            'baditu'
            (<- 'baditu')

        )
    )

    define izenak as (
        [substring] among(
            'ari' 'aria' 'bizia' 'kari' 'karia' 'lari' 'laria' 'tari' 'taria' 'zain' 'zaina'
            'tzain' 'tzaina' 'zale' 'zalea' 'tzale' 'tzalea' 'aizun' 'orde' 'ordea'
            'burua' 'ohi' 'ohia' 'kintza' 'gintzo' 'gintzu' 'tzu' 'tzua'
            'tzo' 'tzoa' 'kuntza' 'talde' 'taldea' 'eria' 'keria' 'teria' 'di'
            'za' 'ada' 'tara' 'etara' 'tra' 'ta' 'tegi' 'tegia' 'keta' 'z' 'zko' 'zkoa'
            'ti' 'tia' 'tsu' 'tsua' 'zu' 'zua' 'bera' 'pera' 'zto' 'ztoa' 'asi' 'asia'
            'gile' 'gilea' 'estu' 'estua' 'larri' 'larria' 'nahi' 'nahia'
            'koi' 'koia' 'oi' 'oia' 'goi' 'min' 'mina' 'dun' 'duna' 'duru' 'durua'
            'duri' 'duria' 'os' 'osa' 'oso' 'osoa' 'ar' 'ara' 'tar' 'dar' 'dara'
            'tiar' 'tiara' 'liar' 'liara' 'gabe' 'gabea' 'kabe' 'kabea' 'ga' 'ge'
            'kada' 'tasun' 'tasuna' 'asun' 'asuna' 'go' 'mendu' 'mendua' 'mentu' 'mentua'
            'mendi' 'mendia' 'zio' 'zioa' 'zino' 'zinoa' 'zione' 'zionea' 'ezia'
            'degi' 'degia' 'egi' 'egia' 'toki' 'tokia' 'leku' 'lekua' 'gintza' 'alde'
            'aldea' 'kalde' 'kaldea' 'gune' 'gunea' 'une' 'unea' 'una' 'pe' 'pea'
            'gibel' 'gibela' 'ondo' 'ondoa' 'arte' 'artea' 'aurre' 'aurrea'
            'etxe' 'etxea' 'ola' 'ontzi' 'ontzia' 'gela' 'denda' 'taldi' 'taldia'
            'aldi' 'aldia' 'te' 'tea' 'zaro' 'zaroa' 'taro' 'taroa' 'oro' 'oroa'
            'aro' 'aroa' 'ero' 'eroa' 'eroz' 'eroza' 'ka' 'kan' 'kana' 'tako' 'etako' 'takoa'
            'kote' 'kotea' 'tzar' 'tzarra' 'handi' 'handia' 'kondo' 'kondoa' 'skila'
            'no' 'noa' '{n~}o' '{n~}oa' 'ska' 'xka' 'zka' 'tila' 'to' 'toa' 'tto' 'ttoa'
            'txo' 'txoa' 'txu' 'txua' 'anda' 'anga' 'urren' 'urrena' 'gai' 'gaia'
            'gei' 'geia' 'eme' 'emea' 'kume' 'kumea' 'sa' 'ko' 'eko' 'koa' 'ena'
            'enea' 'ne' 'nea' 'kor' 'korra' 'ez' 'eza' 'eta' 'etan'
            'ki' 'kia' 'kin' 'kina' 'tu' 'tua' 'du' 'dua' 'ek'
            'tarik' 'tariko' 'tan' 'ordu' 'ordua' 'oste' 'ostea' 'tzara'
            'ra' 'antza' 'behar' 'ro' 'giro' 'ak' 'zp' 'ket'
            'kail' 'kaila' 'ail' 'kirri' 'kirria' 'ngo' 'ngoa' '{n~}i' 'sko'
            'sta' 'koitz' 'koitza' 'na' 'garren' 'garrena' 'kera'
            'gerren' 'gerrena' 'garna' 'kide' 'tz' 'tuko'
            ( RV delete )
            'ora' 'garri' 'garria' 'or' 'buru' 'ren' 'tza'
            ( R2 delete )
            'joka'
            (<- 'jok')
            'tzen' 'ten' 'en' 'tatu'
            (R1 delete)
            'trako'
            (<- 'tra')
            'minutuko'
            (<- 'minutu')
            'zehar'
            (<- 'zehar')
            'geldi'
            (<- 'geldi')
            'igaro'
            (<- 'igaro')
            'aurka'
            (<- 'aurka')
        )
    )

    define adjetiboak as (
        [substring] among(
            'era' 'ero' 'go' 'tate' 'tade' 'date' 'dade' 'keria'
            'ki' 'to' 'ro' 'la' 'gi' 'larik' 'lanik' 'ik' 'ztik' 'rik'
            ( RV delete )
            'zlea'
            (<- 'z')
        )
    )

 )

 define stem as (
    do mark_regions
    backwards (
        repeat aditzak
        repeat izenak
        do adjetiboak
    )

 )

 /*
    Note 1: additions of 21 Jul 2010
 */
--- a/contrib/snowball/algorithms/catalan.sbl
+++ b/contrib/snowball/algorithms/catalan.sbl
@@ -0,0 +1,202 @@
 routines (
           cleaning mark_regions
           R1  R2
           attached_pronoun
           standard_suffix
           verb_suffix
           residual_suffix
 )

 externals ( stem )

 integers ( p1 p2 )

 groupings ( v )

 stringescapes {}

 /* special characters */

 stringdef a'   '{U+00E1}'  // a-acute
 stringdef a`   '{U+00E0}'  // a-grave
 stringdef c,   '{U+00E7}'  // c-cedilla
 stringdef e'   '{U+00E9}'  // e-acute
 stringdef e`   '{U+00E8}'  // e-grave
 stringdef i'   '{U+00ED}'  // i-acute
 stringdef i`   '{U+00EC}'  // i-grave
 stringdef i"   '{U+00EF}'  // i-diaeresis
 stringdef o'   '{U+00F3}'  // o-acute
 stringdef o`   '{U+00F2}'  // o-grave
 stringdef u'   '{U+00FA}'  // u-acute
 stringdef u"   '{U+00FC}'  // u-diaeresis
 stringdef .    '{U+00B7}'   // - per l aggeminades

 define v 'aeiou{a'}{a`}{e'}{e`}{i'}{i"}{o'}{o`}{u'}{u"}'

 define mark_regions as (

    $p1 = limit
    $p2 = limit  // defaults

    do (
        gopast v gopast non-v setmark p1
        gopast v gopast non-v setmark p2
    )
 )

 define cleaning as repeat (
    [substring] among(
        '{a'}' (<- 'a')
        '{a`}' (<- 'a')
        '{e'}' (<- 'e')
        '{e`}' (<- 'e')
        '{i'}' (<- 'i')
        '{i`}' (<- 'i')
        '{o'}' (<- 'o')
        '{o`}' (<- 'o')
        '{u'}' (<- 'u')
        '{u"}' (<- 'u')
        '{i"}' (<- 'i')
        '{.}' (<- '.')
        ''     (next)
    )
 )

 backwardmode (

    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define attached_pronoun as (
    [substring] among (
        '{'}s'  '{'}hi' '{'}ho' '{'}l' '{'}ls'
        '-ls' '-la' '-les' '-li'
        'vos' 'se'  'nos' '-nos' '-us' 'us'
        '{'}n' '{'}ns' '-n' '-ns'
        '{'}m' '-me' '-m'
        '-te' '{'}t'
        'li' 'lo' 'los'
        'me'  'sela' 'selo' 'selas' 'selos' 'le'
        'la' 'las' 'les' 'ens' 'ho' 'hi'
        (R1 delete)
    )
    )

    define standard_suffix as (
        [substring] among(
            'ar' 'atge' 'formes' 'icte' 'ictes'
            'ell' 'ells' 'ella'  '{e'}s' '{e`}s' 'esc' 'essa' 'et' 'ets' 'eta'
            'eres' 'eries' 'ers' 'ina' 'ines' 'able' 'ls'
            'i{o'}' 'itat' 'itats' 'itzar' 'iva' 'ives' 'ivisme' 'ius'
            'fer' 'ment' 'amen' 'ament' 'aments' 'ments' 'ot' 'sfera' 'al' 'als' 'era' 'ana' 'iste'
            'aire' 'eria' 'esa' 'eses' 'esos' 'or' '{i'}cia' '{i'}cies' 'icis' 'ici' '{i'}ci' '{i'}cis'
            '{a`}ria' '{a`}ries' 'alla' 'ci{o'}' 'cions' 'n{c,}a' 'nces' '{o'}' 'dor' 'all'
            'il' '{i'}stic' 'enc' 'enca' '{i'}s' 'issa' 'issos' '{i'}ssem' '{i'}ssiu' 'issem' 'isseu' '{i'}sseu'
            '{o'}s' 'osa'  'dora' 'dores' 'dors' 'adura' 'ble' 'bles' '{i'}vol' '{i'}vola' 'd{i'}s' 'egar' 'ejar' 'ificar'
            'itar' 'ables' 'adors' 'idores' 'idors'
            'adora' 'aci{o'}' 'doras' 'dur' 'dures' 'alleng{u"}es'
            'ant' 'ants' 'ancia' 'ancies' 'at{o`}ria' 'at{o`}ries' 'tori' 'toris'
            'ats' 'ions'  'ota' 'isam' 'ors' 'ora' 'ores' 'isament'
            'bilitat' 'bilitats' 'ivitat' 'ivitats' 'ari' 'aris' 'ionisme' 'ionista' 'ionistes'
            'ialista' 'ialistes' 'ialisme' 'ialismes' 'ud' 'uts' 'uds' 'encia' 'encies' '{e`}ncia' '{e`}ncies'
            '{i"}tat' '{i"}tats' 'atiu' 'atius' 'atives' 'ativa' 'ativitat' 'ativitats' 'ible' 'ibles'
            'assa' 'asses' 'assos'
             'ent' 'ents'
             '{i'}ssim' '{i'}ssima' '{i'}ssims' '{i'}ssimes' '{i`}ssem' '{i`}sseu' '{i`}ssin'
             'ims' 'ima' 'imes'
             'isme' 'ista' 'ismes' 'istes'
             'inia' 'inies' '{i'}inia' '{i'}nies' 'ita' 'ites' 'triu' 'trius'
             'oses' 'osos' 'ient' 'otes' 'ots'
            (R1 delete)
            'acions' 'ada' 'ades'
            (R2 delete)
            'log{i'}a' 'log{i'}es''logia' 'logies' 'logi' 'logis' 'l{o'}gica' 'l{o'}gics' 'l{o'}giques'
            (R2 <- 'log')
            'ic' 'ica' 'ics' 'iques'
            (R2 <- 'ic')
            'qu{i'}ssim' 'qu{i'}ssims' 'qu{i'}ssimes' 'qu{i'}ssima'
            (R1 <- 'c')
        )
    )

    define verb_suffix as (
        [substring] among(
            'ador' 'adora'  'adors' 'adores' 're' 'ie'
             'ent' 'ents' 'udes' 'ar{a`}' 'eren'
            'ar{a'}' 'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
            'aria' 'arian' 'arien' 'aries' 'ar{a`}s'
            'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ara'
            'ar{e'}' 'ar{e'}s'
            'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
            'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
            'er{e'}' 'er' 'erau' 'erass'
            'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
            'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
            'ir{e'}' '{i'}rem' '{i'}reu' '{i'}eu'
            'ia' 'ies' '{i'}em' '{i`}eu' 'ien'
            'at' 'ut' 'uda' 'ava' 'aves' 'avem' '{a'}vem' '{a`}vem' '{a`}veu' '{a'}veu' 'aven' 'au' 'ats'
            'asseu' 'esseu' 'eresseu' '{a`}sseu' '{a`}ssem' '{a`}ssim' '{a`}ssiu'
            'essen' 'esses' 'assen' 'asses' 'assim' 'assiu'
            '{e'}ssen' '{e'}sseu'  '{e'}ssim' '{e'}ssiu' '{e'}ssem'
            '{i'}' 'ares' '{a`}rem' '{a`}reu' '{a`}ren'
            'ar{i'}em' 'ar{i'}eu'
            'areu' 'aren' 'ant' '{i"}m' '{i"}u'
            '{e'}s' '{i"}en' 'en' 'es' 'em' 'am' 'ams' '{i"}a' '{i"}es'
            'dre' 'eix' 'eixer' 'tzar' 'eixes' 'ides' '{i"}des' 'it' '{i"}t' '{i"}da'
            'aba' 'ada' 'ades' 'ida' '{i'}a' 'iera' 'ad' 'ed' 'its'
            'id' 'ids'  'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
            'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
            'ido' 'iendo' 'i{o'}' 'ar' 'ir' 'as'
            'ieu' 'ii' 'io' 'i{a`}'
            'ess' 'essin' 'essis'  'ass' 'assin' 'assis' 'essim' '{e`}ssim' '{e`}ssiu'
            'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
            'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
            'ierais'  'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
            'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos' 'ques'
            '{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
            'ira' 'iran' 'irem' 'iren' 'ires' 'ireu' 'iria' 'irien'
            'iries' 'ir{a`}' 'ir{a`}s' 'ir{e`}' 'ir{i`}em' 'ir{i`}eu'
            'isquen' 'iguem' 'igueu' 'esqui' 'esquin' 'esquis' 'eixi' 'eixin' 'eixis'
            'eixen' 'eixo' 'isin' 'isis'  'esques' 'sis' 'sin'
            'int' 'ir{i'}em' 'ir{i'}eu' 'isc' 'atges' 'esca' 'esquen'
            'issen' 'isses' 'issin' 'issis' 'isca' 'issiu' 'issim'
            '{i"}sc' '{i"}sca' '{i"}ssin' '{i'}ssiu' '{i'}ssim' '{i"}ssis' '{i"}guem' '{i"}gueu'
            '{i"}ra' '{i"}ren' '{i"}res'
            '{i"}squen' '{i"}sques' '{i"}ssen' '{i"}sses' '{i"}xo' '{i"}xen' '{i"}xes' '{i"}x'
            'ixo' 'ixen' 'ixes' 'ix' 'ixa' 'inin' 'inis' 'ini' 'ineu' 'itza' 'itzi' 'itzeu' 'itzis'
            'itzo' 'itz' 'itz{a`}' 'arem' 'in' '{a`}s' 'i{i"}' 'i{i"}n' 'i{i"}s'
                (R1 delete)
            'ando'
                (R2 delete)
        )
    )

    define residual_suffix as (
        [substring] among(
            'os' 'a' 'o' '{a'}' '{a`}' '{i'}' '{o'}' 'e' '{e'}' 'eu' 'iu'
            'is' 'i' 'ir'  's' '{i`}' 'itz' '{i"}' '{i"}n' '{i"}s' 'it'
            (R1 delete)
            'iqu'
            (R1 <- 'ic')
        )
    )
 )

 define stem as (
    do mark_regions
    backwards (
    do attached_pronoun
    do ( standard_suffix or
             verb_suffix
           )
        do residual_suffix
    )
    do cleaning
 )

 /*
     First works 2010/07/19
     First Grammatical Reviews: https://ca.wikipedia.org/wiki/Gram%C3%A0tica_del_catal%C3%A0
     Suffix list: https://ca.wikipedia.org/wiki/Llista_de_sufixos
     Irregular Verbs: https://ca.wikipedia.org/wiki/Flexi%C3%B3_verbal_del_catal%C3%A0
 */
--- a/contrib/snowball/algorithms/danish/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/danish/stem_ISO_8859_1.sbl
@@ -12,15 +12,17 @@ strings ( ch )

 integers ( p1 x )

 groupings ( v s_ending )
 groupings ( c v s_ending )

 stringescapes {}

 /* special characters (in ISO Latin I) */
 /* special characters */

 stringdef ae   hex 'E6'
 stringdef ao   hex 'E5'
 stringdef o/   hex 'F8'
 stringdef ae   '{U+00E6}'
 stringdef ao   '{U+00E5}'
 stringdef o/   '{U+00F8}'

 define c 'bcdfghjklmnpqrstvwxz'

 define v 'aeiouy{ae}{ao}{o/}'

@@ -73,7 +75,7 @@ backwardmode (
        )
    )
    define undouble as (
        setlimit tomark p1 for ([non-v] ->ch)
        setlimit tomark p1 for ([c] ->ch)
        ch
        delete
    )
--- a/contrib/snowball/algorithms/danish/stem_MS_DOS_Latin_I.sbl
+++ b/contrib/snowball/algorithms/danish/stem_MS_DOS_Latin_I.sbl
@@ -1,91 +0,0 @@
 routines (
           mark_regions
           main_suffix
           consonant_pair
           other_suffix
           undouble
 )

 externals ( stem )

 strings ( ch )

 integers ( p1 x )

 groupings ( v s_ending )

 stringescapes {}

 /* special characters (in MS-DOS Latin I) */

 stringdef ae   hex '91'
 stringdef ao   hex '86'
 stringdef o/   hex '9B'

 define v 'aeiouy{ae}{ao}{o/}'

 define s_ending  'abcdfghjklmnoprtvyz{ao}'

 define mark_regions as (

    $p1 = limit

    test ( hop 3 setmark x )
    goto v gopast non-v  setmark p1
    try ( $p1 < x  $p1 = x )
 )

 backwardmode (

    define main_suffix as (
        setlimit tomark p1 for ([substring])
        among(

            'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
            'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
            'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
            'erets' 'et' 'eret'
                (delete)
            's'
                (s_ending delete)
        )
    )

    define consonant_pair as (
        test (
            setlimit tomark p1 for ([substring])
            among(
                'gd' // significant in the call from other_suffix
                'dt' 'gt' 'kt'
            )
        )
        next] delete
    )

    define other_suffix as (
        do ( ['st'] 'ig' delete )
        setlimit tomark p1 for ([substring])
        among(
            'ig' 'lig' 'elig' 'els'
                (delete do consonant_pair)
            'l{o/}st'
                (<-'l{o/}s')
        )
    )
    define undouble as (
        setlimit tomark p1 for ([non-v] ->ch)
        ch
        delete
    )
 )

 define stem as (

    do mark_regions
    backwards (
        do main_suffix
        do consonant_pair
        do other_suffix
        do undouble
    )
 )
--- a/contrib/snowball/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
+++ b/contrib/snowball/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
@@ -18,21 +18,21 @@ groupings ( v v_I v_j )

 stringescapes {}

 /* special characters (in MS-DOS Latin I) */
 /* special characters */

 stringdef a"   hex '84'
 stringdef e"   hex '89'
 stringdef i"   hex '8B'
 stringdef o"   hex '94'
 stringdef u"   hex '81'
 stringdef a"   '{U+00E4}'
 stringdef e"   '{U+00EB}'
 stringdef i"   '{U+00EF}'
 stringdef o"   '{U+00F6}'
 stringdef u"   '{U+00FC}'

 stringdef a'   hex 'A0'
 stringdef e'   hex '82'
 stringdef i'   hex 'A1'
 stringdef o'   hex 'A2'
 stringdef u'   hex 'A3'
 stringdef a'   '{U+00E1}'
 stringdef e'   '{U+00E9}'
 stringdef i'   '{U+00ED}'
 stringdef o'   '{U+00F3}'
 stringdef u'   '{U+00FA}'

 stringdef e`   hex '8A'
 stringdef e`   '{U+00E8}'

 define v       'aeiouy{e`}'
 define v_I     v + 'I'
--- a/contrib/snowball/algorithms/dutch/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/dutch/stem_ISO_8859_1.sbl
@@ -1,164 +0,0 @@
 routines (
           prelude postlude
           e_ending
           en_ending
           mark_regions
           R1 R2
           undouble
           standard_suffix
 )

 externals ( stem )

 booleans ( e_found )

 integers ( p1 p2 )

 groupings ( v v_I v_j )

 stringescapes {}

 /* special characters (in ISO Latin I) */

 stringdef a"   hex 'E4'
 stringdef e"   hex 'EB'
 stringdef i"   hex 'EF'
 stringdef o"   hex 'F6'
 stringdef u"   hex 'FC'

 stringdef a'   hex 'E1'
 stringdef e'   hex 'E9'
 stringdef i'   hex 'ED'
 stringdef o'   hex 'F3'
 stringdef u'   hex 'FA'

 stringdef e`   hex 'E8'

 define v       'aeiouy{e`}'
 define v_I     v + 'I'
 define v_j     v + 'j'

 define prelude as (
    test repeat (
        [substring] among(
            '{a"}' '{a'}'
                (<- 'a')
            '{e"}' '{e'}'
                (<- 'e')
            '{i"}' '{i'}'
                (<- 'i')
            '{o"}' '{o'}'
                (<- 'o')
            '{u"}' '{u'}'
                (<- 'u')
            ''  (next)
        ) //or next
    )
    try(['y'] <- 'Y')
    repeat goto (
        v [('i'] v <- 'I') or
           ('y']   <- 'Y')
    )
 )

 define mark_regions as (

    $p1 = limit
    $p2 = limit

    gopast v  gopast non-v  setmark p1
    try($p1 < 3  $p1 = 3)  // at least 3
    gopast v  gopast non-v  setmark p2

 )

 define postlude as repeat (

    [substring] among(
        'Y'  (<- 'y')
        'I'  (<- 'i')
        ''   (next)
    ) //or next

 )

 backwardmode (

    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define undouble as (
        test among('kk' 'dd' 'tt') [next] delete
    )

    define e_ending as (
        unset e_found
        ['e'] R1 test non-v delete
        set e_found
        undouble
    )

    define en_ending as (
        R1 non-v and not 'gem' delete
        undouble
    )

    define standard_suffix as (
        do (
            [substring] among(
                'heden'
                (   R1 <- 'heid'
                )
                'en' 'ene'
                (   en_ending
                )
                's' 'se'
                (   R1 non-v_j delete
                )
            )
        )
        do e_ending

        do ( ['heid'] R2 not 'c' delete
             ['en'] en_ending
           )

        do (
            [substring] among(
                'end' 'ing'
                (   R2 delete
                    (['ig'] R2 not 'e' delete) or undouble
                )
                'ig'
                (   R2 not 'e' delete
                )
                'lijk'
                (   R2 delete e_ending
                )
                'baar'
                (   R2 delete
                )
                'bar'
                (   R2 e_found delete
                )
            )
        )
        do (
            non-v_I
            test (
                among ('aa' 'ee' 'oo' 'uu')
                non-v
            )
            [next] delete
        )
    )
 )

 define stem as (

        do prelude
        do mark_regions
        backwards
            do standard_suffix
        do postlude
 )
--- a/contrib/snowball/algorithms/english/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/english/stem_ISO_8859_1.sbl
--- a/contrib/snowball/algorithms/finnish/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/finnish/stem_ISO_8859_1.sbl
@@ -24,16 +24,17 @@ externals ( stem )
 integers ( p1 p2 )
 strings ( x )
 booleans ( ending_removed )
 groupings ( AEI V1 V2 particle_end )
 groupings ( AEI C V1 V2 particle_end )

 stringescapes {}

 /* special characters (in ISO Latin I) */
 /* special characters */

 stringdef a"   hex 'E4'
 stringdef o"   hex 'F6'
 stringdef a"   '{U+00E4}'
 stringdef o"   '{U+00F6}'

 define AEI 'a{a"}ei'
 define C 'bcdfghjklmnpqrstvwxz'
 define V1 'aeiouy{a"}{o"}'
 define V2 'aeiou{a"}{o"}'
 define particle_end V1 + 'nt'
@@ -116,7 +117,7 @@ backwardmode (
                )

            'a' '{a"}'              //-.
                     (V1 non-V1)    // |
                     (V1 C)         // |
            'tta' 'tt{a"}'          // Partitive  [32]
                     ('e')          // |
            'ta' 't{a"}'            //-'
@@ -172,11 +173,11 @@ backwardmode (
    define tidy as (
        setlimit tomark p1 for (
            do ( LONG and ([next] delete ) ) // undouble vowel
            do ( [AEI] non-V1 delete ) // remove trailing a, a", e, i
            do ( [AEI] C delete ) // remove trailing a, a", e, i
            do ( ['j'] 'o' or 'u' delete )
            do ( ['o'] 'j' delete )
        )
        goto non-V1 [next] -> x  x delete // undouble consonant
        goto non-V1 [C] -> x  x delete // undouble consonant
    )
 )

--- a/contrib/snowball/algorithms/french/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/french/stem_ISO_8859_1.sbl
@@ -17,21 +17,21 @@ groupings ( v keep_with_s )

 stringescapes {}

 /* special characters (in ISO Latin I) */

 stringdef a^   hex 'E2'  // a-circumflex
 stringdef a`   hex 'E0'  // a-grave
 stringdef c,   hex 'E7'  // c-cedilla

 stringdef e"   hex 'EB'  // e-diaeresis (rare)
 stringdef e'   hex 'E9'  // e-acute
 stringdef e^   hex 'EA'  // e-circumflex
 stringdef e`   hex 'E8'  // e-grave
 stringdef i"   hex 'EF'  // i-diaeresis
 stringdef i^   hex 'EE'  // i-circumflex
 stringdef o^   hex 'F4'  // o-circumflex
 stringdef u^   hex 'FB'  // u-circumflex
 stringdef u`   hex 'F9'  // u-grave
 /* special characters */

 stringdef a^   '{U+00E2}'  // a-circumflex
 stringdef a`   '{U+00E0}'  // a-grave
 stringdef c,   '{U+00E7}'  // c-cedilla

 stringdef e"   '{U+00EB}'  // e-diaeresis (rare)
 stringdef e'   '{U+00E9}'  // e-acute
 stringdef e^   '{U+00EA}'  // e-circumflex
 stringdef e`   '{U+00E8}'  // e-grave
 stringdef i"   '{U+00EF}'  // i-diaeresis
 stringdef i^   '{U+00EE}'  // i-circumflex
 stringdef o^   '{U+00F4}'  // o-circumflex
 stringdef u^   '{U+00FB}'  // u-circumflex
 stringdef u`   '{U+00F9}'  // u-grave

 define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'

@@ -42,6 +42,10 @@ define prelude as repeat goto (
           ('y' ] <- 'Y')
    )
    or
    (  [ '{e"}' ] <- 'He' )
    or
    (  [ '{i"}' ] <- 'Hi' )
    or
    (  ['y'] v <- 'Y' )
    or
    (  'q' ['u'] <- 'U' )
@@ -78,6 +82,9 @@ define postlude as repeat (
        'I' (<- 'i')
        'U' (<- 'u')
        'Y' (<- 'y')
        'He' (<- '{e"}')
        'Hi' (<- '{i"}')
        'H' (delete)
        ''  (next)
    )
 )
@@ -167,7 +174,7 @@ backwardmode (
            'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
            'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
            'issez' 'issiez' 'issions' 'issons' 'it'
                (non-v delete)
                (not 'H' non-v delete)
        )
    )

@@ -196,14 +203,13 @@ backwardmode (
    define keep_with_s 'aiou{e`}s'

    define residual_suffix as (
        try(['s'] test non-keep_with_s delete)
        try(['s'] test ('Hi' or non-keep_with_s) delete)
        setlimit tomark pV for (
            [substring] among(
                'ion'           (R2 's' or 't' delete)
                'ier' 'i{e`}re'
                'Ier' 'I{e`}re' (<-'i')
                'e'             (delete)
                '{e"}'          ('gu' delete)
            )
        )
    )
--- a/contrib/snowball/algorithms/french/stem_MS_DOS_Latin_I.sbl
+++ b/contrib/snowball/algorithms/french/stem_MS_DOS_Latin_I.sbl
@@ -1,239 +0,0 @@
 routines (
           prelude postlude mark_regions
           RV R1 R2
           standard_suffix
           i_verb_suffix
           verb_suffix
           residual_suffix
           un_double
           un_accent
 )

 externals ( stem )

 integers ( pV p1 p2 )

 groupings ( v keep_with_s )

 stringescapes {}

 /* special characters (in MS-DOS Latin I) */

 stringdef a^   hex '83'  // a-circumflex
 stringdef a`   hex '85'  // a-grave
 stringdef c,   hex '87'  // c-cedilla

 stringdef e"   hex '89'  // e-diaeresis (rare)
 stringdef e'   hex '82'  // e-acute
 stringdef e^   hex '88'  // e-circumflex
 stringdef e`   hex '8A'  // e-grave
 stringdef i"   hex '8B'  // i-diaeresis
 stringdef i^   hex '8C'  // i-circumflex
 stringdef o^   hex '93'  // o-circumflex
 stringdef u^   hex '96'  // u-circumflex
 stringdef u`   hex '97'  // u-grave

 define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'

 define prelude as repeat goto (

    (  v [ ('u' ] v <- 'U') or
           ('i' ] v <- 'I') or
           ('y' ] <- 'Y')
    )
    or
    (  ['y'] v <- 'Y' )
    or
    (  'q' ['u'] <- 'U' )
 )

 define mark_regions as (

    $pV = limit
    $p1 = limit
    $p2 = limit  // defaults

    do (
        ( v v next ) or ( next gopast v )
        setmark pV
    )
    do (
        gopast v gopast non-v setmark p1
        gopast v gopast non-v setmark p2
    )
 )

 define postlude as repeat (

    [substring] among(
        'I' (<- 'i')
        'U' (<- 'u')
        'Y' (<- 'y')
        ''  (next)
    )
 )

 backwardmode (

    define RV as $pV <= cursor
    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define standard_suffix as (
        [substring] among(

            'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
            'ances' 'iqUes' 'ismes' 'ables' 'istes'
               ( R2 delete )
            'atrice' 'ateur' 'ation'
            'atrices' 'ateurs' 'ations'
               ( R2 delete
                 try ( ['ic'] (R2 delete) or <-'iqU' )
               )
            'logie'
            'logies'
               ( R2 <- 'log' )
            'usion' 'ution'
            'usions' 'utions'
               ( R2 <- 'u' )
            'ence'
            'ences'
               ( R2 <- 'ent' )
            'ement'
            'ements'
            (
                RV delete
                try (
                    [substring] among(
                        'iv'   (R2 delete ['at'] R2 delete)
                        'eus'  ((R2 delete) or (R1<-'eux'))
                        'abl' 'iqU'
                               (R2 delete)
                        'i{e`}r' 'I{e`}r'      //)
                               (RV <-'i')      //)--new 2 Sept 02
                    )
                )
            )
            'it{e'}'
            'it{e'}s'
            (
                R2 delete
                try (
                    [substring] among(
                        'abil' ((R2 delete) or <-'abl')
                        'ic'   ((R2 delete) or <-'iqU')
                        'iv'   (R2 delete)
                    )
                )
            )
            'if' 'ive'
            'ifs' 'ives'
            (
                R2 delete
                try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
            )
            'eaux' (<- 'eau')
            'aux'  (R1 <- 'al')
            'euse'
            'euses'((R2 delete) or (R1<-'eux'))

            'issement'
            'issements'(R1 non-v delete) // verbal

            // fail(...) below forces entry to verb_suffix. -ment typically
            // follows the p.p., e.g 'confus{e'}ment'.

            'amment'   (RV fail(<- 'ant'))
            'emment'   (RV fail(<- 'ent'))
            'ment'
            'ments'    (test(v RV) fail(delete))
                       // v is e,i,u,{e'},I or U
        )
    )

    define i_verb_suffix as setlimit tomark pV for (
        [substring] among (
            '{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
            'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
            'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
            'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
            'issez' 'issiez' 'issions' 'issons' 'it'
                (non-v delete)
        )
    )

    define verb_suffix as setlimit tomark pV for (
        [substring] among (
            'ions'
                (R2 delete)

            '{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
            'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
            'erons' 'eront' 'ez' 'iez'

            // 'ons' //-best omitted

                (delete)

            '{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
            'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
            'assions'
                (delete
                 try(['e'] delete)
                )
        )
    )

    define keep_with_s 'aiou{e`}s'

    define residual_suffix as (
        try(['s'] test non-keep_with_s delete)
        setlimit tomark pV for (
            [substring] among(
                'ion'           (R2 's' or 't' delete)
                'ier' 'i{e`}re'
                'Ier' 'I{e`}re' (<-'i')
                'e'             (delete)
                '{e"}'          ('gu' delete)
            )
        )
    )

    define un_double as (
        test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
    )

    define un_accent as (
        atleast 1 non-v
        [ '{e'}' or '{e`}' ] <-'e'
    )
 )

 define stem as (

    do prelude
    do mark_regions
    backwards (

        do (
            (
                 ( standard_suffix or
                   i_verb_suffix or
                   verb_suffix
                 )
                 and
                 try( [ ('Y'   ] <- 'i' ) or
                        ('{c,}'] <- 'c' )
                 )
            ) or
            residual_suffix
        )

        // try(['ent'] RV delete) // is best omitted

        do un_double
        do un_accent
    )
    do postlude
 )

--- a/contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl
@@ -18,12 +18,12 @@ groupings ( v s_ending st_ending )

 stringescapes {}

 /* special characters (in ISO Latin I) */
 /* special characters */

 stringdef a"   hex 'E4'
 stringdef o"   hex 'F6'
 stringdef u"   hex 'FC'
 stringdef ss   hex 'DF'
 stringdef a"   '{U+00E4}'
 stringdef o"   '{U+00F6}'
 stringdef u"   '{U+00FC}'
 stringdef ss   '{U+00DF}'

 define v 'aeiouy{a"}{o"}{u"}'

--- a/contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl
+++ b/contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl
@@ -1,139 +0,0 @@

 /*
    Extra rule for -nisse ending added 11 Dec 2009
 */

 routines (
           prelude postlude
           mark_regions
           R1 R2
           standard_suffix
 )

 externals ( stem )

 integers ( p1 p2 x )

 groupings ( v s_ending st_ending )

 stringescapes {}

 /* special characters (in MS-DOS Latin I) */

 stringdef a"   hex '84'
 stringdef o"   hex '94'
 stringdef u"   hex '81'
 stringdef ss   hex 'E1'

 define v 'aeiouy{a"}{o"}{u"}'

 define s_ending  'bdfghklmnrt'
 define st_ending s_ending - 'r'

 define prelude as (

    test repeat (
        (
            ['{ss}'] <- 'ss'
        ) or next
    )

    repeat goto (
        v [('u'] v <- 'U') or
           ('y'] v <- 'Y')
    )
 )

 define mark_regions as (

    $p1 = limit
    $p2 = limit

    test(hop 3 setmark x)

    gopast v  gopast non-v  setmark p1
    try($p1 < x  $p1 = x)  // at least 3
    gopast v  gopast non-v  setmark p2

 )

 define postlude as repeat (

    [substring] among(
        'Y'    (<- 'y')
        'U'    (<- 'u')
        '{a"}' (<- 'a')
        '{o"}' (<- 'o')
        '{u"}' (<- 'u')
        ''     (next)
    )

 )

 backwardmode (

    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define standard_suffix as (
        do (
            [substring] R1 among(
                'em' 'ern' 'er'
                (   delete
                )
                'e' 'en' 'es'
                (   delete
                    try (['s'] 'nis' delete)
                )
                's'
                (   s_ending delete
                )
            )
        )
        do (
            [substring] R1 among(
                'en' 'er' 'est'
                (   delete
                )
                'st'
                (   st_ending hop 3 delete
                )
            )
        )
        do (
            [substring] R2 among(
                'end' 'ung'
                (   delete
                    try (['ig'] not 'e' R2 delete)
                )
                'ig' 'ik' 'isch'
                (   not 'e' delete
                )
                'lich' 'heit'
                (   delete
                    try (
                        ['er' or 'en'] R1 delete
                    )
                )
                'keit'
                (   delete
                    try (
                        [substring] R2 among(
                            'lich' 'ig'
                            (   delete
                            )
                        )
                    )
                )
            )
        )
    )
 )

 define stem as (
    do prelude
    do mark_regions
    backwards
        do standard_suffix
    do postlude
 )
--- a/contrib/snowball/algorithms/german2/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/german2/stem_ISO_8859_1.sbl
@@ -18,12 +18,12 @@ groupings ( v s_ending st_ending )

 stringescapes {}

 /* special characters (in ISO Latin I) */
 /* special characters */

 stringdef a"   hex 'E4'
 stringdef o"   hex 'F6'
 stringdef u"   hex 'FC'
 stringdef ss   hex 'DF'
 stringdef a"   '{U+00E4}'
 stringdef o"   '{U+00F6}'
 stringdef u"   '{U+00FC}'
 stringdef ss   '{U+00DF}'

 define v 'aeiouy{a"}{o"}{u"}'

--- a/contrib/snowball/algorithms/greek.sbl
+++ b/contrib/snowball/algorithms/greek.sbl
@@ -0,0 +1,706 @@
 // A stemmer for Modern Greek language, based on:
 //
 // Ntais, Georgios. Development of a Stemmer for the Greek
 // Language. Diss. Royal Institute of Technology, 2006.
 // https://sais.se/mthprize/2007/ntais2007.pdf
 //
 // Saroukos, Spyridon. Enhancing a Greek language stemmer.
 // University of Tampere, 2008.
 // https://tampub.uta.fi/bitstream/handle/10024/80480/gradu03463.pdf

 stringescapes {}

 stringdef a    '{U+03B1}' // alpha
 stringdef v    '{U+03B2}' // beta
 stringdef g    '{U+03B3}' // gamma
 stringdef d    '{U+03B4}' // delta
 stringdef e    '{U+03B5}' // epsilon
 stringdef z    '{U+03B6}' // zeta
 stringdef i    '{U+03B7}' // eta
 stringdef th   '{U+03B8}' // theta
 stringdef y    '{U+03B9}' // iota
 stringdef k    '{U+03BA}' // kappa
 stringdef l    '{U+03BB}' // lamda
 stringdef m    '{U+03BC}' // mu
 stringdef n    '{U+03BD}' // nu
 stringdef x    '{U+03BE}' // xi
 stringdef o    '{U+03BF}' // omicron
 stringdef p    '{U+03C0}' // pi
 stringdef r    '{U+03C1}' // rho
 stringdef ss   '{U+03C2}' // sigma final
 stringdef s    '{U+03C3}' // sigma
 stringdef t    '{U+03C4}' // tau
 stringdef u    '{U+03C5}' // upsilon
 stringdef f    '{U+03C6}' // phi
 stringdef ch   '{U+03C7}' // chi
 stringdef ps   '{U+03C8}' // psi
 stringdef oo   '{U+03C9}' // omega

 stringdef A    '{U+0391}' // Alpha
 stringdef V    '{U+0392}' // Beta
 stringdef G    '{U+0393}' // Gamma
 stringdef D    '{U+0394}' // Delta
 stringdef E    '{U+0395}' // Epsilon
 stringdef Z    '{U+0396}' // Zeta
 stringdef I    '{U+0397}' // Eta
 stringdef Th   '{U+0398}' // Theta
 stringdef Y    '{U+0399}' // Iota
 stringdef K    '{U+039A}' // Kappa
 stringdef L    '{U+039B}' // Lamda
 stringdef M    '{U+039C}' // Mu
 stringdef N    '{U+039D}' // Nu
 stringdef X    '{U+039E}' // Xi
 stringdef O    '{U+039F}' // Omicron
 stringdef P    '{U+03A0}' // Pi
 stringdef R    '{U+03A1}' // Rho
 stringdef S    '{U+03A3}' // Sigma
 stringdef T    '{U+03A4}' // Tau
 stringdef U    '{U+03A5}' // Upsilon
 stringdef F    '{U+03A6}' // Phi
 stringdef Ch   '{U+03A7}' // Chi
 stringdef Ps   '{U+03A8}' // Psi
 stringdef Oo   '{U+03A9}' // Omega

 stringdef Y:   '{U+03AA}' // Iota with dialytika
 stringdef U:   '{U+03AB}' // Upsilon with dialytika

 stringdef a'   '{U+03AC}' // alpha with tonos
 stringdef e'   '{U+03AD}' // epsilon with tonos
 stringdef i'   '{U+03AE}' // eta with tonos
 stringdef y'   '{U+03AF}' // iota with tonos
 stringdef o'   '{U+03CC}' // omicron with tonos
 stringdef u'   '{U+03CD}' // upsilon with tonos
 stringdef oo'  '{U+03CE}' // omega with tonos

 stringdef i:'  '{U+0390}' // iota with dialytika and tonos
 stringdef u:'  '{U+03B0}' // upsilon with dialytika and tonos

 stringdef i:   '{U+03CA}' // iota with dialytika
 stringdef u:   '{U+03CB}' // upsilon with dialytika

 stringdef A'   '{U+0386}' // Alpha with tonos
 stringdef E'   '{U+0388}' // Epsilon with tonos
 stringdef I'   '{U+0389}' // Eta with tonos
 stringdef Y'   '{U+038A}' // Iota with tonos
 stringdef O'   '{U+038C}' // Omicron with tonos
 stringdef U'   '{U+038E}' // Upsilon with tonos
 stringdef OO'  '{U+038F}' // Omega with tonos

 externals ( stem )

 booleans ( test1 )

 groupings ( v v2 )

 routines ( tolower has_min_length
           steps1 steps2 steps3 steps4 steps5 steps6 steps7
           steps8 steps9 steps10
           step1 step2a step2b step2c step2d step3 step4
           step5a step5b step5c step5d step5e step5f
           step5g step5h step5i
           step5j step5k step5l step5m
           step6 step7 )

 define v '{a}{e}{i}{y}{o}{u}{oo}'
 define v2 '{a}{e}{i}{y}{o}{oo}'

 backwardmode (
  define has_min_length as (
    $(len >= 3)
  )

  define tolower as (
    repeat (
      [substring] among (
        '{A}' (<- '{a}')
        '{V}' (<- '{v}')
        '{G}' (<- '{g}')
        '{D}' (<- '{d}')
        '{E}' (<- '{e}')
        '{Z}' (<- '{z}')
        '{I}' (<- '{i}')
        '{Th}' (<- '{th}')
        '{Y}' (<- '{y}')
        '{K}' (<- '{k}')
        '{L}' (<- '{l}')
        '{M}' (<- '{m}')
        '{N}' (<- '{n}')
        '{X}' (<- '{x}')
        '{O}' (<- '{o}')
        '{P}' (<- '{p}')
        '{R}' (<- '{r}')
        '{S}' (<- '{s}')
        '{T}' (<- '{t}')
        '{U}' (<- '{u}')
        '{F}' (<- '{f}')
        '{Ch}' (<- '{ch}')
        '{Ps}' (<- '{ps}')
        '{Oo}' (<- '{oo}')
        '{Y:}' (<- '{y}')
        '{U:}' (<- '{u}')
        '{a'}' (<- '{a}')
        '{e'}' (<- '{e}')
        '{i'}' (<- '{i}')
        '{y'}' (<- '{y}')
        '{o'}' (<- '{o}')
        '{u'}' (<- '{u}')
        '{oo'}' (<- '{oo}')
        '{i:'}' (<- '{i}')
        '{u:'}' (<- '{u}')
        '{i:}' (<- '{i}')
        '{u:}' (<- '{u}')
        '{A'}' (<- '{a}')
        '{E'}' (<- '{e}')
        '{I'}' (<- '{i}')
        '{Y'}' (<- '{y}')
        '{O'}' (<- '{o}')
        '{U'}' (<- '{u}')
        '{OO'}' (<- '{oo}')
        '{ss}' (<- '{s}')
         '' (next)
      )
    )
  )

  define step1 as (
    [substring] among (
      '{f}{a}{g}{y}{a}' '{f}{a}{g}{y}{o}{u}' '{f}{a}{g}{y}{oo}{n}' (<- '{f}{a}')
      '{s}{k}{a}{g}{y}{a}' '{s}{k}{a}{g}{y}{o}{u}' '{s}{k}{a}{g}{y}{oo}{n}' (<- '{s}{k}{a}')
      '{o}{l}{o}{g}{y}{o}{u}' '{o}{l}{o}{g}{y}{a}' '{o}{l}{o}{g}{y}{oo}{n}' (<- '{o}{l}{o}')
      '{s}{o}{g}{y}{o}{u}' '{s}{o}{g}{y}{a}' '{s}{o}{g}{y}{oo}{n}' (<- '{s}{o}')
      '{t}{a}{t}{o}{g}{y}{a}' '{t}{a}{t}{o}{g}{y}{o}{u}' '{t}{a}{t}{o}{g}{y}{oo}{n}' (<- '{t}{a}{t}{o}')
      '{k}{r}{e}{a}{s}' '{k}{r}{e}{a}{t}{o}{s}' '{k}{r}{e}{a}{t}{a}' '{k}{r}{e}{a}{t}{oo}{n}' (<- '{k}{r}{e}')
      '{p}{e}{r}{a}{s}' '{p}{e}{r}{a}{t}{o}{s}' '{p}{e}{r}{a}{t}{i}' '{p}{e}{r}{a}{t}{a}' '{p}{e}{r}{a}{t}{oo}{n}' (<- '{p}{e}{r}')
      '{t}{e}{r}{a}{s}' '{t}{e}{r}{a}{t}{o}{s}' '{t}{e}{r}{a}{t}{a}' '{t}{e}{r}{a}{t}{oo}{n}' (<- '{t}{e}{r}')
      '{f}{oo}{s}' '{f}{oo}{t}{o}{s}' '{f}{oo}{t}{a}' '{f}{oo}{t}{oo}{n}' (<- '{f}{oo}')
      '{k}{a}{th}{e}{s}{t}{oo}{s}' '{k}{a}{th}{e}{s}{t}{oo}{t}{o}{s}' '{k}{a}{th}{e}{s}{t}{oo}{t}{a}' '{k}{a}{th}{e}{s}{t}{oo}{t}{oo}{n}' (<- '{k}{a}{th}{e}{s}{t}')
      '{g}{e}{g}{o}{n}{o}{s}' '{g}{e}{g}{o}{n}{o}{t}{o}{s}' '{g}{e}{g}{o}{n}{o}{t}{a}' '{g}{e}{g}{o}{n}{o}{t}{oo}{n}' (<- '{g}{e}{g}{o}{n}')
    )
    unset test1
  )

  define steps1 as (
    [substring] among (
      '{y}{z}{a}' '{y}{z}{e}{s}' '{y}{z}{e}' '{y}{z}{a}{m}{e}' '{y}{z}{a}{t}{e}' '{y}{z}{a}{n}' '{y}{z}{a}{n}{e}' '{y}{z}{oo}' '{y}{z}{e}{y}{s}' '{y}{z}{e}{y}'
      '{y}{z}{o}{u}{m}{e}' '{y}{z}{e}{t}{e}' '{y}{z}{o}{u}{n}' '{y}{z}{o}{u}{n}{e}' (
        delete
        unset test1
        ([] substring atlimit among (
          '{a}{n}{a}{m}{p}{a}' '{e}{m}{p}{a}' '{e}{p}{a}' '{x}{a}{n}{a}{p}{a}' '{p}{a}' '{p}{e}{r}{y}{p}{a}' '{a}{th}{r}{o}' '{s}{u}{n}{a}{th}{r}{o}' '{d}{a}{n}{e}'
          (<- '{y}')
        )) or
        ([] substring atlimit among (
          '{m}{a}{r}{k}' '{k}{o}{r}{n}' '{a}{m}{p}{a}{r}' '{a}{r}{r}' '{v}{a}{th}{u}{r}{y}' '{v}{a}{r}{k}' '{v}' '{v}{o}{l}{v}{o}{r}' '{g}{k}{r}'
          '{g}{l}{u}{k}{o}{r}' '{g}{l}{u}{k}{u}{r}' '{y}{m}{p}' '{l}' '{l}{o}{u}' '{m}{a}{r}' '{m}' '{p}{r}' '{m}{p}{r}' '{p}{o}{l}{u}{r}' '{p}'
          '{r}' '{p}{y}{p}{e}{r}{o}{r}'
          (<- '{y}{z}')
        ))
      )
    )
  )

  define steps2 as (
    [substring] among (
      '{oo}{th}{i}{k}{a}' '{oo}{th}{i}{k}{e}{s}' '{oo}{th}{i}{k}{e}' '{oo}{th}{i}{k}{a}{m}{e}' '{oo}{th}{i}{k}{a}{t}{e}' '{oo}{th}{i}{k}{a}{n}' '{oo}{th}{i}{k}{a}{n}{e}' (
        delete
        unset test1
        [] substring atlimit among (
          '{a}{l}' '{v}{y}' '{e}{n}' '{u}{ps}' '{l}{y}' '{z}{oo}' '{s}' '{ch}' (<- '{oo}{n}')
        )
      )
    )
  )

  define steps3 as (
    [substring] among (
      '{y}{s}{a}' '{y}{s}{e}{s}' '{y}{s}{e}' '{y}{s}{a}{m}{e}' '{y}{s}{a}{t}{e}' '{y}{s}{a}{n}' '{y}{s}{a}{n}{e}' (
        delete
        unset test1
        ('{y}{s}{a}' atlimit <- '{y}{s}') or
        ([] substring atlimit among (
          '{a}{n}{a}{m}{p}{a}' '{a}{th}{r}{o}' '{e}{m}{p}{a}' '{e}{s}{e}' '{e}{s}{oo}{k}{l}{e}' '{e}{p}{a}' '{x}{a}{n}{a}{p}{a}' '{e}{p}{e}' '{p}{e}{r}{y}{p}{a}'
          '{s}{u}{n}{a}{th}{r}{o}' '{d}{a}{n}{e}' '{k}{l}{e}' '{ch}{a}{r}{t}{o}{p}{a}' '{e}{x}{a}{r}{ch}{a}' '{m}{e}{t}{e}{p}{e}' '{a}{p}{o}{k}{l}{e}'
          '{a}{p}{e}{k}{l}{e}' '{e}{k}{l}{e}' '{p}{e}'
          (<- '{y}')
        )) or
        ([] substring atlimit among (
          '{a}{n}' '{a}{f}' '{g}{e}' '{g}{y}{g}{a}{n}{t}{o}{a}{f}' '{g}{k}{e}' '{d}{i}{m}{o}{k}{r}{a}{t}' '{k}{o}{m}' '{g}{k}' '{m}' '{p}'
          '{p}{o}{u}{k}{a}{m}' '{o}{l}{o}' '{l}{a}{r}'
          (<- '{y}{s}')
        ))
      )
    )
  )

  define steps4 as (
    [substring] among (
      '{y}{s}{oo}' '{y}{s}{e}{y}{s}' '{y}{s}{e}{y}' '{y}{s}{o}{u}{m}{e}' '{y}{s}{e}{t}{e}' '{y}{s}{o}{u}{n}' '{y}{s}{o}{u}{n}{e}' (
        delete
        unset test1
        [] substring atlimit among (
          '{a}{n}{a}{m}{p}{a}' '{e}{m}{p}{a}' '{e}{s}{e}' '{e}{s}{oo}{k}{l}{e}' '{e}{p}{a}' '{x}{a}{n}{a}{p}{a}' '{e}{p}{e}' '{p}{e}{r}{y}{p}{a}' '{a}{th}{r}{o}'
          '{s}{u}{n}{a}{th}{r}{o}' '{d}{a}{n}{e}' '{k}{l}{e}' '{ch}{a}{r}{t}{o}{p}{a}' '{e}{x}{a}{r}{ch}{a}' '{m}{e}{t}{e}{p}{e}' '{a}{p}{o}{k}{l}{e}' '{a}{p}{e}{k}{l}{e}'
          '{e}{k}{l}{e}' '{p}{e}'
          (<- '{y}')
        )
      )
    )
  )

  define steps5 as (
    [substring] among (
      '{y}{s}{t}{o}{s}' '{y}{s}{t}{o}{u}' '{y}{s}{t}{o}' '{y}{s}{t}{e}' '{y}{s}{t}{o}{y}' '{y}{s}{t}{oo}{n}' '{y}{s}{t}{o}{u}{s}' '{y}{s}{t}{i}' '{y}{s}{t}{i}{s}'
      '{y}{s}{t}{a}' '{y}{s}{t}{e}{s}' (
        delete
        unset test1
        ([] substring atlimit among (
          '{d}{a}{n}{e}' '{s}{u}{n}{a}{th}{r}{o}' '{k}{l}{e}' '{s}{e}' '{e}{s}{oo}{k}{l}{e}' '{a}{s}{e}' '{p}{l}{e}'
          (<- '{y}')
        )) or
        ([] substring atlimit among (
          '{m}' '{p}' '{a}{p}' '{a}{r}' '{i}{d}' '{k}{t}' '{s}{k}' '{s}{ch}' '{u}{ps}' '{f}{a}' '{ch}{r}' '{ch}{t}' '{a}{k}{t}'
          '{a}{o}{r}' '{a}{s}{ch}' '{a}{t}{a}' '{a}{ch}{n}' '{a}{ch}{t}' '{g}{e}{m}' '{g}{u}{r}' '{e}{m}{p}' '{e}{u}{p}' '{e}{ch}{th}' '{i}{f}{a}'
          '{k}{a}{th}' '{k}{a}{k}' '{k}{u}{l}' '{l}{u}{g}' '{m}{a}{k}' '{m}{e}{g}' '{t}{a}{ch}' '{f}{y}{l}' '{ch}{oo}{r}'
          (<- '{y}{s}{t}')
        ))
      )
    )
  )

  define steps6 as (
    [substring] among (
      '{y}{s}{m}{o}' '{y}{s}{m}{o}{y}' '{y}{s}{m}{o}{s}' '{y}{s}{m}{o}{u}' '{y}{s}{m}{o}{u}{s}' '{y}{s}{m}{oo}{n}' (
        delete
        unset test1
        ([] substring atlimit among (
          '{s}{e}' '{m}{e}{t}{a}{s}{e}' '{m}{y}{k}{r}{o}{s}{e}' '{e}{g}{k}{l}{e}' '{a}{p}{o}{k}{l}{e}'
          (<- '{y}{s}{m}')
        )) or
        ([] substring atlimit among (
          '{d}{a}{n}{e}' '{a}{n}{t}{y}{d}{a}{n}{e}'
          (<- '{y}')
        )) or
        ([substring] among (
          '{a}{g}{n}{oo}{s}{t}{y}{k}' (<- '{a}{g}{n}{oo}{s}{t}')
          '{a}{t}{o}{m}{y}{k}' (<- '{a}{t}{o}{m}')
          '{g}{n}{oo}{s}{t}{y}{k}' (<- '{g}{n}{oo}{s}{t}')
          '{e}{th}{n}{y}{k}' (<- '{e}{th}{n}')
          '{e}{k}{l}{e}{k}{t}{y}{k}' (<- '{e}{k}{l}{e}{k}{t}')
          '{s}{k}{e}{p}{t}{y}{k}' (<- '{s}{k}{e}{p}{t}')
          '{t}{o}{p}{y}{k}' (<- '{t}{o}{p}')
          '{a}{l}{e}{x}{a}{n}{d}{r}{y}{n}' (<- '{a}{l}{e}{x}{a}{n}{d}{r}')
          '{v}{u}{z}{a}{n}{t}{y}{n}' (<- '{v}{u}{z}{a}{n}{t}')
          '{th}{e}{a}{t}{r}{y}{n}' (<- '{th}{e}{a}{t}{r}')
        ))
      )
    )
  )

  define steps7 as (
    [substring] among (
      '{a}{r}{a}{k}{y}' '{a}{r}{a}{k}{y}{a}' '{o}{u}{d}{a}{k}{y}' '{o}{u}{d}{a}{k}{y}{a}' (
        delete
        unset test1
        [] substring atlimit among (
         '{s}' '{ch}'
         (<- '{a}{r}{a}{k}')
        )
      )
    )
  )

  define steps8 as (
    [substring] among (
      '{a}{k}{y}' '{a}{k}{y}{a}' '{y}{t}{s}{a}' '{y}{t}{s}{a}{s}' '{y}{t}{s}{e}{s}' '{y}{t}{s}{oo}{n}' '{a}{r}{a}{k}{y}' '{a}{r}{a}{k}{y}{a}' (
        delete
        unset test1
        ([] substring atlimit among (
          '{v}{a}{m}{v}' '{v}{r}' '{k}{a}{y}{m}' '{k}{o}{n}' '{k}{o}{r}' '{l}{a}{v}{r}' '{l}{o}{u}{l}' '{m}{e}{r}' '{m}{o}{u}{s}{t}'
          '{n}{a}{g}{k}{a}{s}' '{p}{l}' '{r}' '{r}{u}' '{s}' '{s}{k}' '{s}{o}{k}' '{s}{p}{a}{n}' '{t}{z}' '{f}{a}{r}{m}' '{ch}' '{k}{a}{p}{a}{k}'
          '{a}{l}{y}{s}{f}' '{a}{m}{v}{r}' '{a}{n}{th}{r}' '{k}' '{f}{u}{l}' '{k}{a}{t}{r}{a}{p}' '{k}{l}{y}{m}' '{m}{a}{l}' '{s}{l}{o}{v}' '{f}'
          '{s}{f}' '{t}{s}{e}{ch}{o}{s}{l}{o}{v}'
           (<- '{a}{k}')
        )) or
        ([] substring atlimit among (
          '{v}' '{v}{a}{l}' '{g}{y}{a}{n}' '{g}{l}' '{z}' '{i}{g}{o}{u}{m}{e}{n}' '{k}{a}{r}{d}' '{k}{o}{n}' '{m}{a}{k}{r}{u}{n}' '{n}{u}{f}'
          '{p}{a}{t}{e}{r}' '{p}' '{s}{k}' '{t}{o}{s}' '{t}{r}{y}{p}{o}{l}'
          (<- '{y}{t}{s}')
        )) or
        ([] '{k}{o}{r}' <- '{y}{t}{s}')
      )
    )
  )

  define steps9 as (
    [substring] among (
      '{y}{d}{y}{o}' '{y}{d}{y}{a}' '{y}{d}{y}{oo}{n}' (
        delete
        unset test1
        ([] substring atlimit among (
          '{a}{y}{f}{n}' '{y}{r}' '{o}{l}{o}' '{ps}{a}{l}' (<- '{y}{d}')
        )) or
        ([] substring among (
          '{e}' '{p}{a}{y}{ch}{n}' (<- '{y}{d}')
        ))
      )
    )
  )

  define steps10 as (
    [substring] among (
      '{y}{s}{k}{o}{s}' '{y}{s}{k}{o}{u}' '{y}{s}{k}{o}' '{y}{s}{k}{e}' (
        delete
        unset test1
        [] substring atlimit among (
         '{d}' '{y}{v}' '{m}{i}{n}' '{r}' '{f}{r}{a}{g}{k}' '{l}{u}{k}' '{o}{v}{e}{l}'
         (<- '{y}{s}{k}')
        )
      )
    )
  )

  define step2a as (
    [substring] among (
      '{a}{d}{e}{s}' '{a}{d}{oo}{n}' (delete)
    )
    not ([substring] among (
      '{o}{k}' '{m}{a}{m}' '{m}{a}{n}' '{m}{p}{a}{m}{p}' '{p}{a}{t}{e}{r}' '{g}{y}{a}{g}{y}' '{n}{t}{a}{n}{t}' '{k}{u}{r}' '{th}{e}{y}' '{p}{e}{th}{e}{r}'
    ))
    insert '{a}{d}'
  )

  define step2b as (
    [substring] among (
      '{e}{d}{e}{s}' '{e}{d}{oo}{n}' (delete)
    )
    [] substring among (
      '{o}{p}' '{y}{p}' '{e}{m}{p}' '{u}{p}' '{g}{i}{p}' '{d}{a}{p}' '{k}{r}{a}{s}{p}' '{m}{y}{l}' (<- '{e}{d}')
    )
  )

  define step2c as (
    [substring] among (
      '{o}{u}{d}{e}{s}' '{o}{u}{d}{oo}{n}' (delete)
    )
    [] substring among (
      '{a}{r}{k}' '{k}{a}{l}{y}{a}{k}' '{p}{e}{t}{a}{l}' '{l}{y}{ch}' '{p}{l}{e}{x}' '{s}{k}' '{s}' '{f}{l}' '{f}{r}' '{v}{e}{l}' '{l}{o}{u}{l}' '{ch}{n}'
      '{s}{p}' '{t}{r}{a}{g}' '{f}{e}' (<- '{o}{u}{d}')
    )
  )

  define step2d as (
    [substring] among (
      '{e}{oo}{s}' '{e}{oo}{n}' (delete unset test1)
    )
    [] substring atlimit among (
      '{th}' '{d}' '{e}{l}' '{g}{a}{l}' '{n}' '{p}' '{y}{d}' '{p}{a}{r}' (<- '{e}')
    )
  )

  define step3 as (
    [substring] among (
      '{y}{a}' '{y}{o}{u}' '{y}{oo}{n}' (delete unset test1)
    )
    ([] v <- '{y}')
  )

  define step4 as (
    [substring] among (
       '{y}{k}{a}' '{y}{k}{o}' '{y}{k}{o}{u}' '{y}{k}{oo}{n}' (delete unset test1)
    )
    ([] v <- '{y}{k}') or
    [] substring atlimit among (
      '{a}{l}' '{a}{d}' '{e}{n}{d}' '{a}{m}{a}{n}' '{a}{m}{m}{o}{ch}{a}{l}' '{i}{th}' '{a}{n}{i}{th}' '{a}{n}{t}{y}{d}' '{f}{u}{s}' '{v}{r}{oo}{m}' '{g}{e}{r}'
      '{e}{x}{oo}{d}' '{k}{a}{l}{p}' '{k}{a}{l}{l}{y}{n}' '{k}{a}{t}{a}{d}' '{m}{o}{u}{l}' '{m}{p}{a}{n}' '{m}{p}{a}{g}{y}{a}{t}' '{m}{p}{o}{l}' '{m}{p}{o}{s}'
      '{n}{y}{t}' '{x}{y}{k}' '{s}{u}{n}{o}{m}{i}{l}' '{p}{e}{t}{s}' '{p}{y}{t}{s}' '{p}{y}{k}{a}{n}{t}' '{p}{l}{y}{a}{t}{s}' '{p}{o}{s}{t}{e}{l}{n}' '{p}{r}{oo}{t}{o}{d}'
      '{s}{e}{r}{t}' '{s}{u}{n}{a}{d}' '{t}{s}{a}{m}' '{u}{p}{o}{d}' '{f}{y}{l}{o}{n}' '{f}{u}{l}{o}{d}' '{ch}{a}{s}'
      (<- '{y}{k}')
    )
  )

  define step5a as (
    do ('{a}{g}{a}{m}{e}' atlimit <- '{a}{g}{a}{m}')
    do (
      [substring] among (
        '{a}{g}{a}{m}{e}' '{i}{s}{a}{m}{e}' '{o}{u}{s}{a}{m}{e}' '{i}{k}{a}{m}{e}' '{i}{th}{i}{k}{a}{m}{e}' (delete unset test1)
      )
    )
    ['{a}{m}{e}']
    delete
    unset test1
    [] substring atlimit among (
      '{a}{n}{a}{p}' '{a}{p}{o}{th}' '{a}{p}{o}{k}' '{a}{p}{o}{s}{t}' '{v}{o}{u}{v}' '{x}{e}{th}' '{o}{u}{l}' '{p}{e}{th}' '{p}{y}{k}{r}' '{p}{o}{t}' '{s}{y}{ch}' '{ch}'
      (<- '{a}{m}')
    )
  )

  define step5b as (
    do (
      [substring] among (
        '{a}{g}{a}{n}{e}' '{i}{s}{a}{n}{e}' '{o}{u}{s}{a}{n}{e}' '{y}{o}{n}{t}{a}{n}{e}' '{y}{o}{t}{a}{n}{e}' '{y}{o}{u}{n}{t}{a}{n}{e}' '{o}{n}{t}{a}{n}{e}' '{o}{t}{a}{n}{e}'
        '{o}{u}{n}{t}{a}{n}{e}' '{i}{k}{a}{n}{e}' '{i}{th}{i}{k}{a}{n}{e}' (
          delete
          unset test1
          [] substring atlimit among (
            '{t}{r}' '{t}{s}' (<- '{a}{g}{a}{n}')
          )
        )
      )
    )
    ['{a}{n}{e}']
    delete
    unset test1
    ([] v2 <- '{a}{n}') or
    [] substring atlimit among (
      '{v}{e}{t}{e}{r}' '{v}{o}{u}{l}{k}' '{v}{r}{a}{ch}{m}' '{g}' '{d}{r}{a}{d}{o}{u}{m}'
      '{th}' '{k}{a}{l}{p}{o}{u}{z}' '{k}{a}{s}{t}{e}{l}' '{k}{o}{r}{m}{o}{r}' '{l}{a}{o}{p}{l}' '{m}{oo}{a}{m}{e}{th}'
      '{m}' '{m}{o}{u}{s}{o}{u}{l}{m}' '{n}' '{o}{u}{l}' '{p}' '{p}{e}{l}{e}{k}' '{p}{l}' '{p}{o}{l}{y}{s}'
      '{p}{o}{r}{t}{o}{l}' '{s}{a}{r}{a}{k}{a}{t}{s}' '{s}{o}{u}{l}{t}' '{t}{s}{a}{r}{l}{a}{t}' '{o}{r}{f}'
      '{t}{s}{y}{g}{g}' '{t}{s}{o}{p}' '{f}{oo}{t}{o}{s}{t}{e}{f}' '{ch}' '{ps}{u}{ch}{o}{p}{l}' '{a}{g}'
      '{g}{a}{l}' '{g}{e}{r}' '{d}{e}{k}' '{d}{y}{p}{l}' '{a}{m}{e}{r}{y}{k}{a}{n}' '{o}{u}{r}' '{p}{y}{th}'
      '{p}{o}{u}{r}{y}{t}' '{s}' '{z}{oo}{n}{t}' '{y}{k}' '{k}{a}{s}{t}' '{k}{o}{p}' '{l}{y}{ch}'
      '{l}{o}{u}{th}{i}{r}' '{m}{a}{y}{n}{t}' '{m}{e}{l}' '{s}{y}{g}' '{s}{p}' '{s}{t}{e}{g}' '{t}{r}{a}{g}'
      '{t}{s}{a}{g}' '{f}' '{e}{r}' '{a}{d}{a}{p}' '{a}{th}{y}{g}{g}' '{a}{m}{i}{ch}' '{a}{n}{y}{k}'
      '{a}{n}{o}{r}{g}' '{a}{p}{i}{g}' '{a}{p}{y}{th}' '{a}{t}{s}{y}{g}{g}' '{v}{a}{s}' '{v}{a}{s}{k}'
      '{v}{a}{th}{u}{g}{a}{l}' '{v}{y}{o}{m}{i}{ch}' '{v}{r}{a}{ch}{u}{k}' '{d}{y}{a}{t}' '{d}{y}{a}{f}' '{e}{n}{o}{r}{g}'
      '{th}{u}{s}' '{k}{a}{p}{n}{o}{v}{y}{o}{m}{i}{ch}' '{k}{a}{t}{a}{g}{a}{l}' '{k}{l}{y}{v}' '{k}{o}{y}{l}{a}{r}{f}'
      '{l}{y}{v}' '{m}{e}{g}{l}{o}{v}{y}{o}{m}{i}{ch}' '{m}{y}{k}{r}{o}{v}{y}{o}{m}{i}{ch}' '{n}{t}{a}{v}'
      '{x}{i}{r}{o}{k}{l}{y}{v}' '{o}{l}{y}{g}{o}{d}{a}{m}' '{o}{l}{o}{g}{a}{l}' '{p}{e}{n}{t}{a}{r}{f}' '{p}{e}{r}{i}{f}'
      '{p}{e}{r}{y}{t}{r}' '{p}{l}{a}{t}' '{p}{o}{l}{u}{d}{a}{p}' '{p}{o}{l}{u}{m}{i}{ch}' '{s}{t}{e}{f}' '{t}{a}{v}'
      '{t}{e}{t}' '{u}{p}{e}{r}{i}{f}' '{u}{p}{o}{k}{o}{p}' '{ch}{a}{m}{i}{l}{o}{d}{a}{p}' '{ps}{i}{l}{o}{t}{a}{v}'
      (<- '{a}{n}')
    )
  )

  define step5c as (
    do (
      [substring] among (
        '{i}{s}{e}{t}{e}' (delete unset test1)
      )
    )
    ['{e}{t}{e}']
    delete
    unset test1
    ([] v2 <- '{e}{t}') or
    ([] substring among (
      '{o}{d}' '{a}{y}{r}' '{f}{o}{r}' '{t}{a}{th}' '{d}{y}{a}{th}' '{s}{ch}' '{e}{n}{d}' '{e}{u}{r}' '{t}{y}{th}' '{u}{p}{e}{r}{th}'
      '{r}{a}{th}' '{e}{n}{th}' '{r}{o}{th}' '{s}{th}' '{p}{u}{r}' '{a}{y}{n}' '{s}{u}{n}{d}' '{s}{u}{n}' '{s}{u}{n}{th}' '{ch}{oo}{r}'
      '{p}{o}{n}' '{v}{r}' '{k}{a}{th}' '{e}{u}{th}' '{e}{k}{th}' '{n}{e}{t}' '{r}{o}{n}' '{a}{r}{k}' '{v}{a}{r}' '{v}{o}{l}' '{oo}{f}{e}{l}'
      (<- '{e}{t}')
    )) or
    [] substring atlimit among (
      '{a}{v}{a}{r}' '{v}{e}{n}' '{e}{n}{a}{r}' '{a}{v}{r}' '{a}{d}' '{a}{th}' '{a}{n}' '{a}{p}{l}' '{v}{a}{r}{o}{n}' '{n}{t}{r}' '{s}{k}' '{k}{o}{p}'
      '{m}{p}{o}{r}' '{n}{y}{f}' '{p}{a}{g}' '{p}{a}{r}{a}{k}{a}{l}' '{s}{e}{r}{p}' '{s}{k}{e}{l}' '{s}{u}{r}{f}' '{t}{o}{k}' '{u}' '{d}' '{e}{m}'
      '{th}{a}{r}{r}' '{th}'
      (<- '{e}{t}')
    )
  )

  define step5d as (
    [substring] among (
      '{o}{n}{t}{a}{s}' '{oo}{n}{t}{a}{s}' (
        delete
        unset test1
        ([] '{a}{r}{ch}' atlimit <- '{o}{n}{t}') or
        ([] '{k}{r}{e}' <- '{oo}{n}{t}')
      )
    )
  )

  define step5e as (
    [substring] among (
      '{o}{m}{a}{s}{t}{e}' '{y}{o}{m}{a}{s}{t}{e}' (
        delete
        unset test1
        ([] '{o}{n}' atlimit <- '{o}{m}{a}{s}{t}')
      )
    )
  )

  define step5f as (
    do (
      ['{y}{e}{s}{t}{e}']
      delete
      unset test1
      [] substring atlimit among (
        '{p}' '{a}{p}' '{s}{u}{m}{p}' '{a}{s}{u}{m}{p}' '{a}{k}{a}{t}{a}{p}' '{a}{m}{e}{t}{a}{m}{f}' (<- '{y}{e}{s}{t}')
      )
    )
    ['{e}{s}{t}{e}']
    delete
    unset test1
    [] substring atlimit among (
      '{a}{l}' '{a}{r}' '{e}{k}{t}{e}{l}' '{z}' '{m}' '{x}' '{p}{a}{r}{a}{k}{a}{l}' '{p}{r}{o}' '{n}{y}{s}'
      (<- '{y}{e}{s}{t}')
    )
  )

  define step5g as (
    do (
      [substring] among (
        '{i}{th}{i}{k}{a}' '{i}{th}{i}{k}{e}{s}' '{i}{th}{i}{k}{e}' (delete unset test1)
      )
    )
    [substring] among (
      '{i}{k}{a}' '{i}{k}{e}{s}' '{i}{k}{e}' (
        delete
        unset test1
        ([] substring among (
           '{s}{k}{oo}{l}' '{s}{k}{o}{u}{l}' '{n}{a}{r}{th}' '{s}{f}' '{o}{th}' '{p}{y}{th}' (<- '{i}{k}')
        )) or
        ([] substring atlimit among (
           '{d}{y}{a}{th}' '{th}' '{p}{a}{r}{a}{k}{a}{t}{a}{th}' '{p}{r}{o}{s}{th}' '{s}{u}{n}{th}' (<- '{i}{k}')
        ))
      )
    )
  )

  define step5h as (
    [substring] among (
      '{o}{u}{s}{a}' '{o}{u}{s}{e}{s}' '{o}{u}{s}{e}' (
        delete
        unset test1
        ([] substring among (
          '{p}{o}{d}{a}{r}' '{v}{l}{e}{p}' '{p}{a}{n}{t}{a}{ch}' '{f}{r}{u}{d}' '{m}{a}{n}{t}{y}{l}' '{m}{a}{l}{l}' '{k}{u}{m}{a}{t}' '{l}{a}{ch}' '{l}{i}{g}'
          '{f}{a}{g}' '{o}{m}' '{p}{r}{oo}{t}' (<- '{o}{u}{s}')

        )) or
        ([] substring atlimit among (
          '{f}{a}{r}{m}{a}{k}' '{ch}{a}{d}' '{a}{g}{k}' '{a}{n}{a}{r}{r}' '{v}{r}{o}{m}' '{e}{k}{l}{y}{p}' '{l}{a}{m}{p}{y}{d}' '{l}{e}{ch}' '{m}' '{p}{a}{t}'
          '{r}' '{l}' '{m}{e}{d}' '{m}{e}{s}{a}{z}' '{u}{p}{o}{t}{e}{y}{n}' '{a}{m}' '{a}{y}{th}' '{a}{n}{i}{k}' '{d}{e}{s}{p}{o}{z}'
          '{e}{n}{d}{y}{a}{f}{e}{r}' '{d}{e}' '{d}{e}{u}{t}{e}{r}{e}{u}' '{k}{a}{th}{a}{r}{e}{u}' '{p}{l}{e}' '{t}{s}{a}'
          (<- '{o}{u}{s}')
        ))
      )
    )
  )

  define step5i as (
    [substring] among (
      '{a}{g}{a}' '{a}{g}{e}{s}' '{a}{g}{e}' (
        delete
        unset test1
        ([] '{k}{o}{l}{l}' <- '{a}{g}') or (
          not ([substring] among ('{ps}{o}{f}' '{n}{a}{u}{l}{o}{ch}'))
          ([] substring among (
            '{o}{f}' '{p}{e}{l}' '{ch}{o}{r}{t}' '{l}{l}' '{s}{f}' '{r}{p}' '{f}{r}' '{p}{r}' '{l}{o}{ch}' '{s}{m}{i}{n}'
            (<- '{a}{g}')
          )) or
          ([] substring atlimit among (
            '{a}{v}{a}{s}{t}' '{p}{o}{l}{u}{f}' '{a}{d}{i}{f}' '{p}{a}{m}{f}' '{r}' '{a}{s}{p}' '{a}{f}' '{a}{m}{a}{l}' '{a}{m}{a}{l}{l}{y}'
            '{a}{n}{u}{s}{t}' '{a}{p}{e}{r}' '{a}{s}{p}{a}{r}' '{a}{ch}{a}{r}' '{d}{e}{r}{v}{e}{n}' '{d}{r}{o}{s}{o}{p}' '{x}{e}{f}' '{n}{e}{o}{p}'
            '{n}{o}{m}{o}{t}' '{o}{l}{o}{p}' '{o}{m}{o}{t}' '{p}{r}{o}{s}{t}' '{p}{r}{o}{s}{oo}{p}{o}{p}' '{s}{u}{m}{p}' '{s}{u}{n}{t}' '{t}' '{u}{p}{o}{t}'
            '{ch}{a}{r}' '{a}{e}{y}{p}' '{a}{y}{m}{o}{s}{t}' '{a}{n}{u}{p}' '{a}{p}{o}{t}' '{a}{r}{t}{y}{p}' '{d}{y}{a}{t}' '{e}{n}' '{e}{p}{y}{t}'
            '{k}{r}{o}{k}{a}{l}{o}{p}' '{s}{y}{d}{i}{r}{o}{p}' '{l}' '{n}{a}{u}' '{o}{u}{l}{a}{m}' '{o}{u}{r}' '{p}' '{t}{r}' '{m}'
            (<- '{a}{g}')
          ))
        )
      )
    )
  )

  define step5j as (
    [substring] among (
      '{i}{s}{e}' '{i}{s}{o}{u}' '{i}{s}{a}' (delete unset test1)
    )
    [] substring atlimit among (
      '{n}' '{ch}{e}{r}{s}{o}{n}' '{d}{oo}{d}{e}{k}{a}{n}' '{e}{r}{i}{m}{o}{n}' '{m}{e}{g}{a}{l}{o}{n}' '{e}{p}{t}{a}{n}' (<- '{i}{s}')
    )
  )

  define step5k as (
    [substring] among (
      '{i}{s}{t}{e}' (delete unset test1)
    )
    [] substring atlimit among (
      '{a}{s}{v}' '{s}{v}' '{a}{ch}{r}' '{ch}{r}' '{a}{p}{l}' '{a}{e}{y}{m}{n}' '{d}{u}{s}{ch}{r}' '{e}{u}{ch}{r}' '{k}{o}{y}{n}{o}{ch}{r}' '{p}{a}{l}{y}{m}{ps}'
      (<- '{i}{s}{t}')
    )
  )

  define step5l as (
    [substring] among (
      '{o}{u}{n}{e}' '{i}{s}{o}{u}{n}{e}' '{i}{th}{o}{u}{n}{e}' (delete unset test1)
    )
    [] substring atlimit among (
      '{n}' '{r}' '{s}{p}{y}' '{s}{t}{r}{a}{v}{o}{m}{o}{u}{t}{s}' '{k}{a}{k}{o}{m}{o}{u}{t}{s}' '{e}{x}{oo}{n}' (<- '{o}{u}{n}')
    )
  )

  define step5m as (
    [substring] among (
      '{o}{u}{m}{e}' '{i}{s}{o}{u}{m}{e}' '{i}{th}{o}{u}{m}{e}' (delete unset test1)
    )
    [] substring atlimit among (
      '{p}{a}{r}{a}{s}{o}{u}{s}' '{f}' '{ch}' '{oo}{r}{y}{o}{p}{l}' '{a}{z}' '{a}{l}{l}{o}{s}{o}{u}{s}' '{a}{s}{o}{u}{s}'
      (<- '{o}{u}{m}')
    )
  )

  define step6 as (
    do (
      [substring] among (
        '{m}{a}{t}{a}' '{m}{a}{t}{oo}{n}' '{m}{a}{t}{o}{s}' (<- '{m}{a}')
      )
    )
    test1
    [substring] among (
      '{a}' '{a}{g}{a}{t}{e}' '{a}{g}{a}{n}' '{a}{e}{y}' '{a}{m}{a}{y}' '{a}{n}' '{a}{s}' '{a}{s}{a}{y}' '{a}{t}{a}{y}' '{a}{oo}' '{e}' '{e}{y}'
      '{e}{y}{s}' '{e}{y}{t}{e}' '{e}{s}{a}{y}' '{e}{s}' '{e}{t}{a}{y}' '{y}' '{y}{e}{m}{a}{y}' '{y}{e}{m}{a}{s}{t}{e}' '{y}{e}{t}{a}{y}' '{y}{e}{s}{a}{y}'
      '{y}{e}{s}{a}{s}{t}{e}' '{y}{o}{m}{a}{s}{t}{a}{n}' '{y}{o}{m}{o}{u}{n}' '{y}{o}{m}{o}{u}{n}{a}' '{y}{o}{n}{t}{a}{n}' '{y}{o}{n}{t}{o}{u}{s}{a}{n}' '{y}{o}{s}{a}{s}{t}{a}{n}'
      '{y}{o}{s}{a}{s}{t}{e}' '{y}{o}{s}{o}{u}{n}' '{y}{o}{s}{o}{u}{n}{a}' '{y}{o}{t}{a}{n}' '{y}{o}{u}{m}{a}' '{y}{o}{u}{m}{a}{s}{t}{e}' '{y}{o}{u}{n}{t}{a}{y}'
      '{y}{o}{u}{n}{t}{a}{n}' '{i}' '{i}{d}{e}{s}' '{i}{d}{oo}{n}' '{i}{th}{e}{y}' '{i}{th}{e}{y}{s}' '{i}{th}{e}{y}{t}{e}' '{i}{th}{i}{k}{a}{t}{e}' '{i}{th}{i}{k}{a}{n}'
      '{i}{th}{o}{u}{n}' '{i}{th}{oo}' '{i}{k}{a}{t}{e}' '{i}{k}{a}{n}' '{i}{s}' '{i}{s}{a}{n}' '{i}{s}{a}{t}{e}' '{i}{s}{e}{y}' '{i}{s}{e}{s}' '{i}{s}{o}{u}{n}'
      '{i}{s}{oo}' '{o}' '{o}{y}' '{o}{m}{a}{y}' '{o}{m}{a}{s}{t}{a}{n}' '{o}{m}{o}{u}{n}' '{o}{m}{o}{u}{n}{a}' '{o}{n}{t}{a}{y}' '{o}{n}{t}{a}{n}'
      '{o}{n}{t}{o}{u}{s}{a}{n}' '{o}{s}' '{o}{s}{a}{s}{t}{a}{n}' '{o}{s}{a}{s}{t}{e}' '{o}{s}{o}{u}{n}' '{o}{s}{o}{u}{n}{a}' '{o}{t}{a}{n}' '{o}{u}' '{o}{u}{m}{a}{y}'
      '{o}{u}{m}{a}{s}{t}{e}' '{o}{u}{n}' '{o}{u}{n}{t}{a}{y}' '{o}{u}{n}{t}{a}{n}' '{o}{u}{s}' '{o}{u}{s}{a}{n}' '{o}{u}{s}{a}{t}{e}' '{u}' '{u}{s}' '{oo}'
      '{oo}{n}' (delete)
    )
  )

  define step7 as (
    [substring] among (
      '{e}{s}{t}{e}{r}' '{e}{s}{t}{a}{t}' '{o}{t}{e}{r}' '{o}{t}{a}{t}' '{u}{t}{e}{r}' '{u}{t}{a}{t}' '{oo}{t}{e}{r}' '{oo}{t}{a}{t}' (delete)
    )
  )
 )

 define stem as (
    backwards (
      do tolower
      has_min_length
      set test1
      do step1
      do steps1
      do steps2
      do steps3
      do steps4
      do steps5
      do steps6
      do steps7
      do steps8
      do steps9
      do steps10
      do step2a
      do step2b
      do step2c
      do step2d
      do step3
      do step4
      do step5a
      do step5b
      do step5c
      do step5d
      do step5e
      do step5f
      do step5g
      do step5h
      do step5j
      do step5i
      do step5k
      do step5l
      do step5m
      do step6
      do step7
    )
 )
--- a/contrib/snowball/algorithms/hindi.sbl
+++ b/contrib/snowball/algorithms/hindi.sbl
@@ -0,0 +1,323 @@
 // An implementation of "A Lightweight Stemmer for Hindi":
 // http://www.kbcs.in/downloads/papers/StmmerHindi.pdf

 externals ( stem )

 stringescapes {}

 // The transliteration scheme used for our stringdefs matches that used in the
 // paper, as documented in the appendix.  It appears to match the WX notation
 // (https://en.wikipedia.org/wiki/WX_notation) except that WX apparently
 // uses 'z' for Anunasika whereas the paper uses Mh.
 //
 // We discriminate dependent vowels by adding a leading "_" to their stringdef
 // names (mnemonic: the _ signifies removing the implicit a from the preceding
 // character).

 // Vowels and sonorants:
 stringdef a  '{U+0905}'
 stringdef A  '{U+0906}'
 stringdef i  '{U+0907}'
 stringdef I  '{U+0908}'
 stringdef u  '{U+0909}'
 stringdef U  '{U+090A}'
 stringdef q  '{U+090B}'
 stringdef e  '{U+090F}'
 stringdef E  '{U+0910}'
 stringdef o  '{U+0913}'
 stringdef O  '{U+0914}'

 // Vowel signs:
 stringdef _A '{U+093E}'
 stringdef _i '{U+093F}'
 stringdef _I '{U+0940}'
 stringdef _u '{U+0941}'
 stringdef _U '{U+0942}'
 stringdef _q '{U+0943}'
 stringdef _e '{U+0947}'
 stringdef _E '{U+0948}'
 stringdef _o '{U+094B}'
 stringdef _O '{U+094C}'

 // Diacritics:
 stringdef M  '{U+0902}'
 stringdef H  '{U+0903}'
 stringdef Mh '{U+0901}'
 stringdef Z  '{U+093C}' // Nukta
 stringdef virama '{U+094D}'

 // Velar consonants:
 stringdef k  '{U+0915}'
 stringdef K  '{U+0916}'
 stringdef g  '{U+0917}'
 stringdef G  '{U+0918}'
 stringdef f  '{U+0919}'

 // Palatal consonants:
 stringdef c  '{U+091A}'
 stringdef C  '{U+091B}'
 stringdef j  '{U+091C}'
 stringdef J  '{U+091D}'
 stringdef F  '{U+091E}'

 // Retroflex consonants:
 stringdef t  '{U+091F}'
 stringdef T  '{U+0920}'
 stringdef d  '{U+0921}'
 stringdef D  '{U+0922}'
 stringdef N  '{U+0923}'

 // Dental consonants:
 stringdef w  '{U+0924}'
 stringdef W  '{U+0925}'
 stringdef x  '{U+0926}'
 stringdef X  '{U+0927}'
 stringdef n  '{U+0928}'

 // Labial consonants:
 stringdef p  '{U+092A}'
 stringdef P  '{U+092B}'
 stringdef b  '{U+092C}'
 stringdef B  '{U+092D}'
 stringdef m  '{U+092E}'

 // Semi-vowels:
 stringdef y  '{U+092F}'
 stringdef r  '{U+0930}'
 stringdef l  '{U+0932}'
 stringdef v  '{U+0935}'

 // Fricatives:
 stringdef S  '{U+0936}'
 stringdef R  '{U+0937}'
 stringdef s  '{U+0938}'
 stringdef h  '{U+0939}'

 stringdef lY '{U+0933}'

 // Precomposed characters - letters + nukta:
 stringdef nZ '{U+0929}' // ≡ {n}{Z}
 stringdef rZ '{U+0931}' // ≡ {r}{Z}
 stringdef lYZ '{U+0934}' // ≡ {lY}{Z}
 stringdef kZ '{U+0958}' // ≡ {k}{Z}
 stringdef KZ '{U+0959}' // ≡ {K}{Z}
 stringdef gZ '{U+095A}' // ≡ {g}{Z}
 stringdef jZ '{U+095B}' // ≡ {j}{Z}
 stringdef dZ '{U+095C}' // ≡ {d}{Z}
 stringdef DZ '{U+095D}' // ≡ {D}{Z}
 stringdef PZ '{U+095E}' // ≡ {P}{Z}
 stringdef yZ '{U+095F}' // ≡ {y}{Z}

 integers ( p )

 groupings ( consonant )

 routines ( CONSONANT )

 define consonant '{k}{K}{g}{G}{f}' +
                 '{c}{C}{j}{J}{F}' +
                 '{t}{T}{d}{D}{N}' +
                 '{w}{W}{x}{X}{n}' +
                 '{p}{P}{b}{B}{m}' +
                 '{y}{r}{l}{v}' +
                 '{S}{R}{s}{h}' +
                 '{lY}' +
                 '{Z}' + // Nukta
                 // Precomposed characters - letter and nukta:
                 '{nZ}{rZ}{lYZ}{kZ}{KZ}{gZ}{jZ}{dZ}{DZ}{PZ}{yZ}'

 backwardmode ( define CONSONANT as ( consonant ) )

 define stem as (
    test ( next setmark p )
    backwards (
        // We assume in this implementation that the whole word doesn't count
        // as a valid suffix to remove, so we remove the longest suffix from
        // the list which leaves at least one character.  This change affects
        // 47 words out of the 65,140 in the sample vocabulary from Hindi
        // wikipedia.
        setlimit tomark p for ([substring])
        among (
            // The list below is derived from figure 3 in the paper.
            //
            // We perform the stemming on the Devanagari characters rather than
            // transliterating to Latin, so we have adapted the list below to
            // reflect this by converting suffixes back to Devanagari as
            // follows:
            //
            // * within the suffixes, "a" after a consonant is dropped since
            //   consonants have an implicit "a".
            //
            // * within the suffixes, a vowel other than "a" after a consonant
            //   is a dependent vowel (vowel sign); a vowel (including "a")
            //   after a non-consonant is an independent vowel.
            //
            // * to allow the vowel at the start of each suffix being dependent
            //   or independent, we include each suffix twice.  For the
            //   dependent version, a leading "a" is dropped and we check that
            //   the suffix is preceded by a consonant (which will have an
            //   implicit "a").
            //
            // * we add '{a}', which is needed for the example given right at
            //   the end of section 5 to work (conflating BarawIya and
            //   BarawIyawA), and which 3.1 a.v strongly suggests should be in
            //   the list:
            //
            //     Thus, the following suffix deletions (longest possible
            //     match) are required to reduce inflected forms of masculine
            //     nouns to a common stem:
            //     a A i [...]
            //
            //   Adding '{a}' only affect 2 words out of the 65,140 in the
            //   sample vocabulary.
            //
            // * The transliterations of our stems would end with "a" when our
            //   stems end in a consonant, so we also include {virama} in the
            //   list of suffixes to remove (this affects 222 words from the
            //   sample vocabulary).
            //
            // We've also assumed that Mh in the suffix list always means {Mh}
            // and never {M}{h}{virama}.  Only one of the 65,140 words in the
            // sample vocabulary stems differently due to this (and that word
            // seems to be a typo).

            '{virama}'

            '{a}'
            '{A}'
            '{i}'
            '{I}'
            '{u}'
            '{U}'
            '{e}'
            '{o}'
            '{e}{M}'
            '{o}{M}'
            '{A}{M}'
            '{u}{A}{M}'
            '{u}{e}{M}'
            '{u}{o}{M}'
            '{A}{e}{M}'
            '{A}{o}{M}'
            '{i}{y}{_A}{M}'
            '{i}{y}{_o}{M}'
            '{A}{i}{y}{_A}{M}'
            '{A}{i}{y}{_o}{M}'
            '{A}{Mh}'
            '{i}{y}{_A}{Mh}'
            '{A}{i}{y}{_A}{Mh}'
            '{a}{w}{_A}{e}{M}'
            '{a}{w}{_A}{o}{M}'
            '{a}{n}{_A}{e}{M}'
            '{a}{n}{_A}{o}{M}'
            '{a}{w}{_A}'
            '{a}{w}{_I}'
            '{I}{M}'
            '{a}{w}{_I}{M}'
            '{a}{w}{_e}'
            '{A}{w}{_A}'
            '{A}{w}{_I}'
            '{A}{w}{_I}{M}'
            '{A}{w}{_e}'
            '{a}{n}{_A}'
            '{a}{n}{_I}'
            '{a}{n}{_e}'
            '{A}{n}{_A}'
            '{A}{n}{_e}'
            '{U}{M}{g}{_A}'
            '{U}{M}{g}{_I}'
            '{A}{U}{M}{g}{_A}'
            '{A}{U}{M}{g}{_I}'
            '{e}{M}{g}{_e}'
            '{e}{M}{g}{_I}'
            '{A}{e}{M}{g}{_e}'
            '{A}{e}{M}{g}{_I}'
            '{o}{g}{_e}'
            '{o}{g}{_I}'
            '{A}{o}{g}{_e}'
            '{A}{o}{g}{_I}'
            '{e}{g}{_A}'
            '{e}{g}{_I}'
            '{A}{e}{g}{_A}'
            '{A}{e}{g}{_I}'
            '{A}{y}{_A}'
            '{A}{e}'
            '{A}{I}'
            '{A}{I}{M}'
            '{i}{e}'
            '{A}{o}'
            '{A}{i}{e}'
            '{a}{k}{r}'
            '{A}{k}{r}'

            '{_A}'
            '{_i}'
            '{_I}'
            '{_u}'
            '{_U}'
            '{_e}'
            '{_o}'
            '{_e}{M}'
            '{_o}{M}'
            '{_A}{M}'
            '{_u}{A}{M}'
            '{_u}{e}{M}'
            '{_u}{o}{M}'
            '{_A}{e}{M}'
            '{_A}{o}{M}'
            '{_i}{y}{_A}{M}'
            '{_i}{y}{_o}{M}'
            '{_A}{i}{y}{_A}{M}'
            '{_A}{i}{y}{_o}{M}'
            '{_A}{Mh}'
            '{_i}{y}{_A}{Mh}'
            '{_A}{i}{y}{_A}{Mh}'
            '{_I}{M}'
            '{_A}{w}{_A}'
            '{_A}{w}{_I}'
            '{_A}{w}{_I}{M}'
            '{_A}{w}{_e}'
            '{_A}{n}{_A}'
            '{_A}{n}{_e}'
            '{_U}{M}{g}{_A}'
            '{_U}{M}{g}{_I}'
            '{_A}{U}{M}{g}{_A}'
            '{_A}{U}{M}{g}{_I}'
            '{_e}{M}{g}{_e}'
            '{_e}{M}{g}{_I}'
            '{_A}{e}{M}{g}{_e}'
            '{_A}{e}{M}{g}{_I}'
            '{_o}{g}{_e}'
            '{_o}{g}{_I}'
            '{_A}{o}{g}{_e}'
            '{_A}{o}{g}{_I}'
            '{_e}{g}{_A}'
            '{_e}{g}{_I}'
            '{_A}{e}{g}{_A}'
            '{_A}{e}{g}{_I}'
            '{_A}{y}{_A}'
            '{_A}{e}'
            '{_A}{I}'
            '{_A}{I}{M}'
            '{_i}{e}'
            '{_A}{o}'
            '{_A}{i}{e}'
            '{_A}{k}{r}'

            /* Suffixes with a leading implicit a: */
            '{w}{_A}{e}{M}' CONSONANT
            '{w}{_A}{o}{M}' CONSONANT
            '{n}{_A}{e}{M}' CONSONANT
            '{n}{_A}{o}{M}' CONSONANT
            '{w}{_A}' CONSONANT
            '{w}{_I}' CONSONANT
            '{w}{_I}{M}' CONSONANT
            '{w}{_e}' CONSONANT
            '{n}{_A}' CONSONANT
            '{n}{_I}' CONSONANT
            '{n}{_e}' CONSONANT
            '{k}{r}' CONSONANT
        )
        delete
    )
 )
--- a/contrib/snowball/algorithms/hungarian/stem_Unicode.sbl
+++ b/contrib/snowball/algorithms/hungarian/stem_Unicode.sbl
@@ -27,17 +27,17 @@ groupings ( v )

 stringescapes {}

 /* special characters (in Unicode) */

 stringdef a'  hex 'E1'  //a-acute
 stringdef e'  hex 'E9'  //e-acute
 stringdef i'  hex 'ED'  //i-acute
 stringdef o'  hex 'F3'  //o-acute
 stringdef o"  hex 'F6'  //o-umlaut
 stringdef oq  hex '151' //o-double acute
 stringdef u'  hex 'FA'  //u-acute
 stringdef u"  hex 'FC'  //u-umlaut
 stringdef uq  hex '171' //u-double acute
 /* special characters */

 stringdef a'  '{U+00E1}'  //a-acute
 stringdef e'  '{U+00E9}'  //e-acute
 stringdef i'  '{U+00ED}'  //i-acute
 stringdef o'  '{U+00F3}'  //o-acute
 stringdef o"  '{U+00F6}'  //o-umlaut
 stringdef oq  '{U+0151}' //o-double acute
 stringdef u'  '{U+00FA}'  //u-acute
 stringdef u"  '{U+00FC}'  //u-umlaut
 stringdef uq  '{U+0171}' //u-double acute

 define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'

--- a/contrib/snowball/algorithms/hungarian/stem_ISO_8859_2.sbl
+++ b/contrib/snowball/algorithms/hungarian/stem_ISO_8859_2.sbl
@@ -1,241 +0,0 @@
 /*
 Hungarian Stemmer
 Removes noun inflections
 */

 routines (
    mark_regions
    R1
    v_ending
    case
    case_special
    case_other
    plural
    owned
    sing_owner
    plur_owner
    instrum
    factive
    undouble
    double
 )

 externals ( stem )

 integers ( p1 )
 groupings ( v )

 stringescapes {}

 /* special characters (in ISO Latin 2) */

 stringdef a'  hex 'E1'  //a-acute
 stringdef e'  hex 'E9'  //e-acute
 stringdef i'  hex 'ED'  //i-acute
 stringdef o'  hex 'F3'  //o-acute
 stringdef o"  hex 'F6'  //o-umlaut
 stringdef oq  hex 'F5'  //o-double acute
 stringdef u'  hex 'FA'  //u-acute
 stringdef u"  hex 'FC'  //u-umlaut
 stringdef uq  hex 'FB'  //u-double acute

 define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'

 define mark_regions as (

    $p1 = limit

    (v goto non-v
     among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
     setmark p1)
    or

    (non-v gopast v setmark p1)
 )

 backwardmode (

    define R1 as $p1 <= cursor

    define v_ending as (
        [substring] R1 among(
            '{a'}' (<- 'a')
            '{e'}' (<- 'e')
        )
    )

    define double as (
        test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
        'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
    )

    define undouble as (
        next [hop 1] delete
    )

    define instrum as(
        [substring] R1 among(
            'al' (double)
            'el' (double)
        )
        delete
        undouble
    )


    define case as (
        [substring] R1 among(
            'ban' 'ben'
            'ba' 'be'
            'ra' 're'
            'nak' 'nek'
            'val' 'vel'
            't{o'}l' 't{oq}l'
            'r{o'}l' 'r{oq}l'
            'b{o'}l' 'b{oq}l'
            'hoz' 'hez' 'h{o"}z'
            'n{a'}l' 'n{e'}l'
            'ig'
            'at' 'et' 'ot' '{o"}t'
            '{e'}rt'
            'k{e'}pp' 'k{e'}ppen'
            'kor'
            'ul' '{u"}l'
            'v{a'}' 'v{e'}'
            'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
            'k{e'}nt'
            'en' 'on' 'an' '{o"}n'
            'n'
            't'
        )
        delete
        v_ending
    )

    define case_special as(
        [substring] R1 among(
            '{e'}n' (<- 'e')
            '{a'}n' (<- 'a')
            '{a'}nk{e'}nt' (<- 'a')
        )
    )

    define case_other as(
        [substring] R1 among(
            'astul' 'est{u"}l' (delete)
            'stul' 'st{u"}l' (delete)
            '{a'}stul' (<- 'a')
            '{e'}st{u"}l' (<- 'e')
        )
    )

    define factive as(
        [substring] R1 among(
            '{a'}' (double)
            '{e'}' (double)
        )
        delete
        undouble
    )

    define plural as (
        [substring] R1 among(
            '{a'}k' (<- 'a')
            '{e'}k' (<- 'e')
            '{o"}k' (delete)
            'ak' (delete)
            'ok' (delete)
            'ek' (delete)
            'k' (delete)
        )
    )

    define owned as (
        [substring] R1 among (
            'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
            '{e'}k{e'}' (<- 'e')
            '{a'}k{e'}' (<- 'a')
            'k{e'}' (delete)
            '{e'}{e'}i' (<- 'e')
            '{a'}{e'}i' (<- 'a')
            '{e'}i'  (delete)
            '{e'}{e'}' (<- 'e')
            '{e'}' (delete)
        )
    )

    define sing_owner as (
        [substring] R1 among(
            '{u"}nk' 'unk' (delete)
            '{a'}nk' (<- 'a')
            '{e'}nk' (<- 'e')
            'nk' (delete)
            '{a'}juk' (<- 'a')
            '{e'}j{u"}k' (<- 'e')
            'juk' 'j{u"}k' (delete)
            'uk' '{u"}k' (delete)
            'em' 'om' 'am' (delete)
            '{a'}m' (<- 'a')
            '{e'}m' (<- 'e')
            'm' (delete)
            'od' 'ed' 'ad' '{o"}d' (delete)
            '{a'}d' (<- 'a')
            '{e'}d' (<- 'e')
            'd' (delete)
            'ja' 'je' (delete)
            'a' 'e' 'o' (delete)
            '{a'}' (<- 'a')
            '{e'}' (<- 'e')
        )
    )

    define plur_owner as (
        [substring] R1 among(
            'jaim' 'jeim' (delete)
            '{a'}im' (<- 'a')
            '{e'}im' (<- 'e')
            'aim' 'eim' (delete)
            'im' (delete)
            'jaid' 'jeid' (delete)
            '{a'}id' (<- 'a')
            '{e'}id' (<- 'e')
            'aid' 'eid' (delete)
            'id' (delete)
            'jai' 'jei' (delete)
            '{a'}i' (<- 'a')
            '{e'}i' (<- 'e')
            'ai' 'ei' (delete)
            'i' (delete)
            'jaink' 'jeink' (delete)
            'eink' 'aink' (delete)
            '{a'}ink' (<- 'a')
            '{e'}ink' (<- 'e')
            'ink'
            'jaitok' 'jeitek' (delete)
            'aitok' 'eitek' (delete)
            '{a'}itok' (<- 'a')
            '{e'}itek' (<- 'e')
            'itek' (delete)
            'jeik' 'jaik' (delete)
            'aik' 'eik' (delete)
            '{a'}ik' (<- 'a')
            '{e'}ik' (<- 'e')
            'ik' (delete)
        )
    )
 )

 define stem as (
    do mark_regions
    backwards (
      do instrum
        do case
        do case_special
        do case_other
        do factive
        do owned
        do sing_owner
        do plur_owner
        do plural
    )
 )
--- a/contrib/snowball/algorithms/indonesian.sbl
+++ b/contrib/snowball/algorithms/indonesian.sbl
@@ -0,0 +1,192 @@
 // An implementation of the "Porter Stemmer for Bahasa Indonesia" from:
 // http://www.illc.uva.nl/Research/Publications/Reports/MoL-2003-02.text.pdf

 integers (
    // The paper defines measure as the number of vowels in the word.  We
    // count this initially, then adjust the count each time we remove a
    // prefix or suffix.
    measure

    // Numeric code for the type of prefix removed:
    //
    // 0 other/none
    // 1 'di' or 'meng' or 'ter'
    // 2 'per'
    // 3 'ke' or 'peng'
    // 4 'ber'
    //
    // Some of these have variant forms, so e.g. "meng" includes "men", "me",
    // "meny", "mem".
    //
    // Note that the value of prefix is only used in remove_suffix (and
    // routines it calls) so we don't need to worry about
    // remove_second_order_prefix overwriting a value of prefix set by
    // remove_first_order_prefix since remove_suffix gets called between
    // the two.
    prefix
 )

 groupings ( vowel )

 routines (
    remove_particle
    remove_possessive_pronoun
    remove_first_order_prefix
    remove_second_order_prefix
    remove_suffix
    KER
    SUFFIX_KAN_OK
    SUFFIX_AN_OK
    SUFFIX_I_OK
    VOWEL
 )

 externals ( stem )

 stringescapes {}

 backwardmode (

    define remove_particle as (
        [substring] among (
            'kah' 'lah' 'pun' (delete $measure-=1)
        )
    )

    define remove_possessive_pronoun as (
        [substring] among (
            'ku' 'mu' 'nya' (delete $measure-=1)
        )
    )

    // prefix not in {ke, peng, per}
    define SUFFIX_KAN_OK as (
        // On page 29, the example "kompas Q.31" says "Both Nazief and Porter
        // stemmer converted the word peledakan (blast, explotion) to ledak (to
        // blast, to explode)".  However, the algorithm as described doesn't
        // behave in this way - grammatically the prefix pe- occurs as a
        // variation of both the first-order derivational prefix peng- and the
        // second-order derivational prefix per-, but table 2.5 doesn't include
        // "pe", only table 2.6 does, so "peledakan" is handled (incorrectly)
        // as having prefix "per" not "peng", and so we remove derivational
        // suffix "kan" rather than "an" to give stem leda.  (Porter-style
        // stemmers remove the longest suffix they can amongst those available,
        // which this paper notes in the last paragraph on page 15).
        //
        // We resolve this by amending the condition on suffix "kan" to
        // "prefix ∉ {ke, peng, per}", which seems to make the stemmer's
        // behaviour match all the examples in the paper except for one:
        // "perbaikan" is shown in table 3.4 as stemming to "bai", but with
        // this change it now stems to "baik".  The table notes that "baik" is
        // the actual root so this deviation is an improvement.  In a sample
        // vocabulary derived from the most common words in id.wikipedia.org,
        // this change only affects 0.12% of words (76 out of 64,587, including
        // "peledakan" and "perbaikan").
        $prefix != 3 and $prefix != 2
    )

    // prefix not in {di, meng, ter}
    define SUFFIX_AN_OK as ( $prefix != 1 )

    define SUFFIX_I_OK as (
        // prefix not in {ke, peng, ber}
        $prefix <= 2

        // The rest of the condition from the paper is:
        //   V|K...c₁c₁, c₁ ≠ s, c₂ ≠ i
        //
        // The meaning of this is unclear in several ways, and none of the
        // examples given of the stemmer's behaviour in the paper help to
        // resolve these issues.
        //
        // Notice that c₂ isn't actually used - the most obvious explanation
        // seems to be that "c₁c₁" should read "c₁c₂", or maybe "c₂c₁".
        //
        // Elsewhere the paper defines V... as meaning "the stem starts with
        // a vowel" and K... as meaning "the stem starts with a consonant".
        //
        // In other places where it says X|Y... it seems the | binds more
        // tightly, so it's (V|K)...cᵢcⱼ not V|(K...cᵢcⱼ).  That seems a bit
        // odd as the first letter must be either a vowel or a consonant, so
        // that really just means "ends cᵢcⱼ".  However, nowhere in the paper
        // uses or defines a notation such as ...X, which may explain this
        // seemingly redundant way of specifying this.
        //
        // The conditions elsewhere on prefix removal (e.g. V...) are clearly
        // on the stem left after the prefix is removed.  None of the other
        // rules for suffix removal have conditions on the stem, but for
        // consistency with the prefix rules we might expect that the cᵢcⱼ
        // test is on what's left *after* removing the "i" suffix.
        //
        // However, studying Indonesian wordlists and discussion with a native
        // speaker leads us to conclude that the purpose of this check is to
        // protect words of foreign origin (e.g. "televisi", "organisasi",
        // "komunikasi") from stemming, and the common feature of these is
        // that the word ends "-si", so we conclude that the condition here
        // should be read as "word does not end -si", and this is what we
        // have implemented.
        not 's'
    )

    define remove_suffix as (
        [substring] among (
            'kan' SUFFIX_KAN_OK 'an' SUFFIX_AN_OK 'i' SUFFIX_I_OK
                (delete $measure-=1)
        )
    )
 )

 define vowel 'aeiou'

 define VOWEL as ( vowel )

 define KER as ( non-vowel 'er' )

 define remove_first_order_prefix as (
    [substring] among (
        'di' 'meng' 'men' 'me' 'ter' (delete $prefix=1 $measure-=1)
        'ke' 'peng' 'pen' (delete $prefix=3 $measure-=1)
        'meny' VOWEL ($prefix=1 <-'s' $measure-=1)
        'peny' VOWEL ($prefix=3 <-'s' $measure-=1)
        'mem' ($prefix=1 $measure-=1 vowel and <-'p' or delete)
        'pem' ($prefix=3 $measure-=1 vowel and <-'p' or delete)
    )
 )

 define remove_second_order_prefix as (
    // The paper has the condition on removal of prefix "bel" and "pel" as
    // just "ajar" not "ajar..." but it seems that the latter must be what
    // is intended so that e.g. "pelajaran" stems to "ajar" not "lajar".
    // This change only affects a very small number of words (11 out of
    // 64,587) and only for the better.
    [substring] among (
        'per' 'pe' (delete $prefix=2 $measure-=1)
        'pelajar' (<-'ajar' $measure-=1)
        'ber' (delete $prefix=4 $measure-=1)
        'belajar' (<-'ajar' $prefix=4 $measure-=1)
        'be' KER (delete $prefix=4 $measure-=1)
    )
 )

 define stem as (
    $measure = 0
    do ( repeat ( gopast vowel $measure+=1 ) )
    $measure > 2
    $prefix = 0
    backwards (
        do remove_particle
        $measure > 2
        do remove_possessive_pronoun
    )
    $measure > 2
    test (
        remove_first_order_prefix
        do (
            test ($measure > 2 backwards remove_suffix)
            $measure > 2 remove_second_order_prefix
        )
    ) or (
        do remove_second_order_prefix
        do ($measure > 2 backwards remove_suffix)
    )
 )
--- a/contrib/snowball/algorithms/irish.sbl
+++ b/contrib/snowball/algorithms/irish.sbl
@@ -0,0 +1,151 @@
 routines (
  R1 R2 RV
  initial_morph
  mark_regions
  noun_sfx
  deriv
  verb_sfx
 )

 externals ( stem )

 integers ( pV p1 p2 )

 groupings ( v )

 stringescapes {}

 /* Accented characters */

 stringdef a'   '{U+00E1}'  // a-acute
 stringdef e'   '{U+00E9}'  // e-acute
 stringdef i'   '{U+00ED}'  // i-acute
 stringdef o'   '{U+00F3}'  // o-acute
 stringdef u'   '{U+00FA}'  // u-acute

 define v 'aeiou{a'}{e'}{i'}{o'}{u'}'

 define mark_regions as (

    $pV = limit
    $p1 = limit
    $p2 = limit  // defaults

    do (
        gopast v setmark pV
    )
    do (
        gopast v gopast non-v setmark p1
        gopast v gopast non-v setmark p2
    )
 )

 define initial_morph as (
  [substring] among (
    'h-' 'n-' 't-' //nAthair -> n-athair, but alone are problematic
    (delete)

    // verbs
    'd{'}'
    (delete)
    'd{'}fh'
    (<- 'f')
    // other contractions
    'm{'}' 'b{'}'
    (delete)

    'sh'
    (<- 's')

    'mb'
    (<- 'b')
    'gc'
    (<- 'c')
    'nd'
    (<- 'd')
    'bhf'
    (<- 'f')
    'ng'
    (<- 'g')
    'bp'
    (<- 'p')
    'ts'
    (<- 's')
    'dt'
    (<- 't')

    // Lenition
    'bh'
    (<- 'b')
    'ch'
    (<- 'c')
    'dh'
    (<- 'd')
    'fh'
    (<- 'f')
    'gh'
    (<- 'g')
    'mh'
    (<- 'm')
    'ph'
    (<- 'p')
    'th'
    (<- 't')
  )
 )

 backwardmode (

  define RV as $pV <= cursor
  define R1 as $p1 <= cursor
  define R2 as $p2 <= cursor

  define noun_sfx as (
    [substring] among (
      'amh' 'eamh' 'abh' 'eabh'
      'aibh' 'ibh' 'aimh' 'imh'
      'a{i'}ocht' '{i'}ocht' 'a{i'}ochta' '{i'}ochta'
      (R1 delete)
      'ire' 'ir{i'}' 'aire' 'air{i'}'
      (R2 delete)
    )
  )
  define deriv as (
    [substring] among (
      'acht' 'eacht' 'ach' 'each' 'eacht{u'}il' 'eachta' 'acht{u'}il' 'achta'
      (R2 delete)  //siopadóireacht -> siopadóir but not poblacht -> pobl
      'arcacht' 'arcachta{i'}' 'arcachta'
      (<- 'arc') // monarcacht -> monarc
      'gineach' 'gineas' 'ginis'
      (<- 'gin')
      'grafa{i'}och' 'grafa{i'}ocht' 'grafa{i'}ochta' 'grafa{i'}ochta{i'}'
      (<- 'graf')
      'paite' 'patach' 'pataigh' 'patacha'
      (<- 'paite')
      '{o'}ideach' '{o'}ideacha' '{o'}idigh'
      (<- '{o'}id')
    )
  )
  define verb_sfx as (
    [substring] among (
      'imid' 'aimid' '{i'}mid' 'a{i'}mid'
      'faidh' 'fidh'
      (RV delete)
      'ain'
      'eadh' 'adh'
      '{a'}il'
      'tear' 'tar'
      (R1 delete)
    )
  )
 )

 define stem as (
  do initial_morph
  do mark_regions
  backwards (
    do noun_sfx
    do deriv
    do verb_sfx
  )
 )
--- a/contrib/snowball/algorithms/italian/stem_MS_DOS_Latin_I.sbl
+++ b/contrib/snowball/algorithms/italian/stem_MS_DOS_Latin_I.sbl
@@ -16,18 +16,18 @@ groupings ( v AEIO CG )

 stringescapes {}

 /* special characters (in MS-DOS Latin I) */

 stringdef a'   hex 'A0'
 stringdef a`   hex '85'
 stringdef e'   hex '82'
 stringdef e`   hex '8A'
 stringdef i'   hex 'A1'
 stringdef i`   hex '8D'
 stringdef o'   hex 'A2'
 stringdef o`   hex '95'
 stringdef u'   hex 'A3'
 stringdef u`   hex '97'
 /* special characters */

 stringdef a'   '{U+00E1}'
 stringdef a`   '{U+00E0}'
 stringdef e'   '{U+00E9}'
 stringdef e`   '{U+00E8}'
 stringdef i'   '{U+00ED}'
 stringdef i`   '{U+00EC}'
 stringdef o'   '{U+00F3}'
 stringdef o`   '{U+00F2}'
 stringdef u'   '{U+00FA}'
 stringdef u`   '{U+00F9}'

 define v 'aeiou{a`}{e`}{i`}{o`}{u`}'

--- a/contrib/snowball/algorithms/italian/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/italian/stem_ISO_8859_1.sbl
@@ -1,195 +0,0 @@

 routines (
           prelude postlude mark_regions
           RV R1 R2
           attached_pronoun
           standard_suffix
           verb_suffix
           vowel_suffix
 )

 externals ( stem )

 integers ( pV p1 p2 )

 groupings ( v AEIO CG )

 stringescapes {}

 /* special characters (in ISO Latin I) */

 stringdef a'   hex 'E1'
 stringdef a`   hex 'E0'
 stringdef e'   hex 'E9'
 stringdef e`   hex 'E8'
 stringdef i'   hex 'ED'
 stringdef i`   hex 'EC'
 stringdef o'   hex 'F3'
 stringdef o`   hex 'F2'
 stringdef u'   hex 'FA'
 stringdef u`   hex 'F9'

 define v 'aeiou{a`}{e`}{i`}{o`}{u`}'

 define prelude as (
    test repeat (
        [substring] among(
            '{a'}' (<- '{a`}')
            '{e'}' (<- '{e`}')
            '{i'}' (<- '{i`}')
            '{o'}' (<- '{o`}')
            '{u'}' (<- '{u`}')
            'qu'   (<- 'qU')
            ''     (next)
        )
    )
    repeat goto (
        v [ ('u' ] v <- 'U') or
            ('i' ] v <- 'I')
    )
 )

 define mark_regions as (

    $pV = limit
    $p1 = limit
    $p2 = limit // defaults

    do (
        ( v (non-v gopast v) or (v gopast non-v) )
        or
        ( non-v (non-v gopast v) or (v next) )
        setmark pV
    )
    do (
        gopast v gopast non-v setmark p1
        gopast v gopast non-v setmark p2
    )
 )

 define postlude as repeat (

    [substring] among(
        'I'  (<- 'i')
        'U'  (<- 'u')
        ''   (next)
    )

 )

 backwardmode (

    define RV as $pV <= cursor
    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define attached_pronoun as (
        [substring] among(
            'ci' 'gli' 'la' 'le' 'li' 'lo'
            'mi' 'ne' 'si'  'ti' 'vi'
            // the compound forms are:
            'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
            'mela' 'mele' 'meli' 'melo' 'mene'
            'tela' 'tele' 'teli' 'telo' 'tene'
            'cela' 'cele' 'celi' 'celo' 'cene'
            'vela' 'vele' 'veli' 'velo' 'vene'
        )
        among( (RV)
            'ando' 'endo'   (delete)
            'ar' 'er' 'ir'  (<- 'e')
        )
    )

    define standard_suffix as (
        [substring] among(

            'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
            'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
            'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
            'atrice' 'atrici'
            'ante' 'anti' // Note 1
               ( R2 delete )
            'azione' 'azioni' 'atore' 'atori'
               ( R2 delete
                 try ( ['ic'] R2 delete )
               )
            'logia' 'logie'
               ( R2 <- 'log' )
            'uzione' 'uzioni' 'usione' 'usioni'
               ( R2 <- 'u' )
            'enza' 'enze'
               ( R2 <- 'ente' )
            'amento' 'amenti' 'imento' 'imenti'
               ( RV delete )
            'amente' (
                R1 delete
                try (
                    [substring] R2 delete among(
                        'iv' ( ['at'] R2 delete )
                        'os' 'ic' 'abil'
                    )
                )
            )
            'it{a`}' (
                R2 delete
                try (
                    [substring] among(
                        'abil' 'ic' 'iv' (R2 delete)
                    )
                )
            )
            'ivo' 'ivi' 'iva' 'ive' (
                R2 delete
                try ( ['at'] R2 delete ['ic'] R2 delete )
            )
        )
    )

    define verb_suffix as setlimit tomark pV for (
        [substring] among(
            'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
            'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
            'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
            'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
            'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
            'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
            'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
            'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
            'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
            'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
            'ono' 'uta' 'ute' 'uti' 'uto'

            'ar' 'ir' // but 'er' is problematical
                (delete)
        )
    )

    define AEIO 'aeio{a`}{e`}{i`}{o`}'
    define CG 'cg'

    define vowel_suffix as (
        try (
            [AEIO] RV delete
            ['i'] RV delete
        )
        try (
            ['h'] CG RV delete
        )
    )
 )

 define stem as (
    do prelude
    do mark_regions
    backwards (
        do attached_pronoun
        do (standard_suffix or verb_suffix)
        do vowel_suffix
    )
    do postlude
 )

 /*
    Note 1: additions of 15 Jun 2005
 */

--- a/contrib/snowball/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
@@ -1,5 +1,5 @@
 strings ( ch )
 integers ( x p1 p2 )
 integers ( p1 p2 )
 booleans ( Y_found stemmed GE_removed )

 routines (
@@ -20,8 +20,6 @@ groupings ( v v_WX AOU AIOU )

 stringescapes {}

 stringdef '   hex '27'  // yuk

 define v        'aeiouy'
 define v_WX     v + 'wx'
 define AOU      'aou'
@@ -29,8 +27,8 @@ define AIOU     'aiou'

 backwardmode (

    define R1 as (setmark x $x >= p1)
    define R2 as (setmark x $x >= p2)
    define R1 as ($p1 <= cursor)
    define R2 as ($p2 <= cursor)

    define V  as test (v or 'ij')
    define VX as test (next v or 'ij')
@@ -46,7 +44,7 @@ backwardmode (

    define Step_1 as
    (
        [among ( (])
        [substring] among (

            '{'}s' (delete)
            's'    (R1 not ('t' R1) C delete)
@@ -68,7 +66,7 @@ backwardmode (

    define Step_2 as
    (
        [among ( (])
        [substring] among (
            'je'   (('{'}t' ] delete) or
                    ('et'   ] R1 C delete) or
                    ('rnt'  ] <-'rn') or
@@ -92,7 +90,7 @@ backwardmode (

    define Step_3 as
    (
        [among ( (])
        [substring] among (
            'atie'  (R1 <-'eer')
            'iteit' (R1 delete lengthen_V)
            'heid'
@@ -112,7 +110,7 @@ backwardmode (

    define Step_4 as
    (
        (   [among ( (])
        (   [substring] among (
                'ioneel'  (R1 <-'ie')
                'atief'   (R1 <-'eer')
                'baar'    (R1 delete)
@@ -132,7 +130,7 @@ backwardmode (
            )
        )
        or
        (   [among ( (])
        (   [substring] among (
                'iger'
                'igst'
                'ig'      (R1 C delete lengthen_V)
@@ -142,7 +140,7 @@ backwardmode (

    define Step_7 as
    (
        [among ( (])
        [substring] among (
            'kt'   (<-'k')
            'ft'   (<-'f')
            'pt'   (<-'p')
@@ -151,7 +149,7 @@ backwardmode (

    define Step_6 as
    (
        [among ( (])
        [substring] among (
            'bb'   (<-'b')
            'cc'   (<-'c')
            'dd'   (<-'d')
@@ -179,7 +177,7 @@ backwardmode (

    define Step_1c as
    (
        [among ( (] R1 C)
        [substring] among ( (R1 C)
            'd' (not ('n' R1) delete)
            't' (not ('h' R1) delete)
        )
@@ -200,11 +198,8 @@ define Lose_infix as (
 )

 define measure as (
    do (
        tolimit
        setmark p1
        setmark p2
    )
    $p1 = limit
    $p2 = limit
    do(
        repeat non-v  atleast 1 ('ij' or v)  non-v  setmark p1
        repeat non-v  atleast 1 ('ij' or v)  non-v  setmark p2
--- a/contrib/snowball/algorithms/lithuanian.sbl
+++ b/contrib/snowball/algorithms/lithuanian.sbl
@@ -0,0 +1,373 @@
 externals ( stem )

 // escape symbols for substituting lithuanian characters
 stringescapes { }

 /* Special characters in Unicode Latin Extended-A */
 // ' nosine
 stringdef a'   '{U+0105}'  // ą a + ogonek
 stringdef e'   '{U+0119}'  // ę e + ogonek
 stringdef i'   '{U+012F}'  // į i + ogonek
 stringdef u'   '{U+0173}'  // ų u + ogonek

 // . taskas
 stringdef e.   '{U+0117}'  // ė e + dot

 // - ilgoji
 stringdef u-   '{U+016B}'  // ū u + macron

 // * varnele
 stringdef c*   '{U+010D}'  // č c + caron (haček)
 stringdef s*   '{U+0161}'  // š s + caron (haček)
 stringdef z*   '{U+017E}'  // ž z + caron (haček)

 // [C](VC)^m[V|C]
 // definitions of variables for
 // p1 - position of m = 0
 integers ( p1 )

 // groupings
 // v - lithuanian vowels
 groupings ( v )

 // v - all lithuanian vowels
 define v 'aeiyou{a'}{e'}{i'}{u'}{e.}{u-}'

 // all lithuanian stemmer routines: 4 steps
 routines (
  step2 R1 step1 fix_chdz fix_gd fix_conflicts
 )

 backwardmode (

  define R1 as $p1 <= cursor
  define step1 as (
    setlimit tomark p1 for ([substring]) R1 among (
      // Daiktavardžiai (Nouns)
      // I linksniuotė (declension I)
      'as' 'ias' 'is' 'ys'        // vyras, kelias, brolis, gaidys
      'o'    'io'                 // vyro, kelio
      'ui'   'iui'                // vyrui, keliui
      '{a'}' 'i{a'}' '{i'}'       // vyrą, kelią, brolį
      'u'    'iu'                 // vyru, keliu
      'e'    'yje'                // vyre, kelyje
      'y'    'au'    'i'          // kely, brolau, broli,
      'an'                        // nusižengiman

      'ai'   'iai'                // vyrai, keliai
      '{u'}' 'i{u'}'              // vyrų, kelių
      'ams'  'am'                 // vyrams, vyram
      'iams' 'iam'                // broliams, broliam
      'us'   'ius'                // vyrus, brolius
      'ais'  'iais'               // vyrais, keliais
      'uose' 'iuose' 'uos' 'iuos' // vyruose, keliuose, vyruos, keliuos
      'uosna' 'iuosna'            // vyruosna, keliuosna
      'ysna'                      // žutysna

      'asis' 'aisi'               // sukimasis, sukimaisi
      'osi'  '{u'}si'             // sukimosi, sukimųsi
      'uisi'                      // sukimuisi
      '{a'}si'                    // sukimąsi
      'usi'                       // sukimusi
      'esi'                       // sukimesi

      'uo'                        // mėnuo


      // II linksniuote (declension II)
      'a'  'ia'                   // galva, vysnios
      'os' 'ios'                  // galvos, vysnios
      'oj' 'oje' 'ioje'           // galvoje, vysnioje
      'osna' 'iosna'              // galvosna, vyšniosna
      'om' 'oms' 'ioms'           // galvoms, vysnioms
      'omis' 'iomis'              // galvomis, vysniomis
      'ose' 'iose'                // galvose, vysniose
      'on' 'ion'                  // galvon, vyšnion


      // III linksniuote (declension III)
      '{e.}'                      // gervė
      '{e.}s'                     // gervės
      'ei'                        // gervei
      '{e'}'                      // gervę
      '{e.}j' '{e.}je'            // gervėj, gervėje
      '{e.}ms'                    // gervėms
      'es'                        // gerves
      '{e.}mis'                   // gervėmis
      '{e.}se'                    // gervėse
      '{e.}sna'                   // gervėsna
      '{e.}n'                     // žydaitėn


      // IV linksniuote (declension IV)
      'aus' 'iaus'                // sūnaus, skaičiaus
      'umi' 'iumi'                // sūnumi, skaičiumi
      'uje' 'iuje'                // sūnuje, skaičiuje
      'iau'                       // skaičiau

      '{u-}s'                     // sūnūs
      'ums'                       // sūnums
      'umis'                      // sūnumis
      'un' 'iun'                  // sūnun, administratoriun


      // V linksniuote (declension V)
      'ies' 'ens' 'enio' 'ers'    // avies, vandens, sesers
      'eniui' 'eriai'             // vandeniui, eriai
      'en{i'}' 'er{i'}'           // vandenį, seserį
      'imi' 'eniu' 'erimi' 'eria' // avimi, vandeniu, seserimi, seseria
      'enyje' 'eryje'             // vandenyje, seseryje
      'ie' 'enie' 'erie'          // avie, vandenie, seserie

      'enys' 'erys'               // vandenys, seserys
      // 'en{u'}' konfliktas su 'žandenų' 'antenų'
      'er{u'}'                    // seserų
      'ims' 'enims' 'erims'       // avims, vandemins, seserims
      'enis'                      // vandenis
      'imis'                      // žebenkštimis
      'enimis'                    // vandenimis
      'yse' 'enyse' 'eryse'       // avyse, vandenyse, seseryse


      // Būdvardžiai (Adjectives)
      // (i)a linksniuotė
      'iem' 'iems'                // geriem, geriems
      'ame' 'iame'                // naujame, mediniame


      // Veiksmažodžiai (Verbs)
      // Tiesioginė nuosaka (indicative mood)
      // esamasis laikas (present tense)
      // (i)a asmenuotė (declension (i)a)
      'uosi' 'iuosi'              // dirbuosi, traukiuosi
      'iesi'                      // dirbiesi
      'asi' 'iasi'                // dirbasi, traukiasi
      'am{e.}s' 'iam{e.}s'        // dirbamės, traukiamės
      'at' 'ate' 'iat' 'iate'     // dirbat, dirbate, ariat, traukiate
      'at{e.}s' 'iat{e.}s'        // dirbatės, traukiatės

      // i asmenuotė (declension i)
      'isi'                       // tikisi
      'im'                        // mylim
      // 'ime' konfliktassu daiktavardžiu vietininku, pvz. 'gėrime'
      'im{e.}s'                   // tikimės
      'it' 'ite'                  // mylit, mylite, tikitės
      // 'it{e.}s' konfliktas su priesaga ir dgs. vardininko galūne -ait-ės pvz. žydaitės

      // o asmenuotė (declension o)
      'ome'                       // mokome
      'ot' 'ote'                  // mokot, mokote

      // būtasis laikas
      // o asmenuotė (declension o)
      '{e.}jo' '{e.}josi'         // tikėjo, tikėjosi
      'ot{e.}s'                   // tikėjotės/bijotės

      // ė asmenuotė (declension ė)
      'eisi'                      // mokeisi
      '{e.}si'                    // mokėsi
      '{e.}m' '{e.}me'            // mokėm, mokėme
      '{e.}m{e.}s'                // mokėmės
      '{e.}t' '{e.}te'            // mokėt, mokėte
      '{e.}t{e.}s'                // mokėtės

      // būtasis dažninis laikas (frequentative past tense)
      'ausi'                      // mokydavausi
      'om{e.}s'                   // mokydavomės/bijomės


      // būsimasis laikas (future tense)
      'siu' 'siuosi'              // dirbsiu, mokysiuosi
      'si' 'siesi'                // dirbsi, dirbsiesi
      's' 'ysis'                  // dirbs, mokysis
      'sim' 'sime'                // dirbsim, dirbsime
      'sit' 'site'                // gersit, gersite

      // tariamoji nuosaka (subjunctive mood)
      '{c*}iau' '{c*}iausi'       // dirbčiau
      'tum' 'tumei'               // dirbtum, dirbtumei
      'tumeis' 'tumeisi'          // mokytumeis, mokytumeisi
      // 't{u'}' nes blogai batutų -> batų
      't{u'}si'                   // mokytųsi
      // 'tume' konfliktas su 'šventume'
      'tum{e.}m'                  // dirbtumėm
      'tum{e.}me'                 // dirbtumėme
      'tum{e.}m{e.}s'             // mokytumėmės
      'tute' 'tum{e.}t'           // dirbtute, dirbtumėt
      'tum{e.}te'                 // dirbtumėte
      'tum{e.}t{e.}s'             // mokytumėtės

      // liepiamoji nuosaka (imperative mood)
      'k' 'ki'                    // dirbk, dirbki, mokykis
      // 'kis' konfliktas viln-išk-is
      // 'kime' konfliktas, nes pirkime
      'kim{e.}s'                  // mokykimės

      // bendratis (infinitive)
      'uoti' 'iuoti'              // meluoti, dygsniuoti
      'auti' 'iauti'              // draugauti, girtuokliauti
      'oti' 'ioti'                // dovanoti, meškerioti
      '{e.}ti'                    // auklėti
      'yti'                       // akyti
      'inti'                      // auginti
      'in{e.}ti'                  // blusinėti
      'enti'                      // gyventi
      'tel{e.}ti'                 // bumbtelėti
      'ter{e.}ti'                 // bumbterėti

      'ti'                        // skalbti
      // 'tis' konfliktas, nes rytme-tis -> rytme

      // dalyviai (participles)
      '{a'}s' 'i{a'}s' '{i'}s'    // dirbąs, žaidžiąs, gulįs
      't{u'}s'                    // suktųs -> suk
      'sim{e.}s'                  // suksimės
      'sit{e.}s'                  // suksitės
      'kite'                      // supkite
    )

    delete
  )

  define step2 as repeat (
    setlimit tomark p1 for ([substring]) among (
      // daiktavardziu priesagos (Noun suffixes)

      // budvardziu priesagos (Adjective suffixes)
      // 'in' // konfliktas su 'augintinis' ir 'akiniais' // lauk-in-is
      'ing'                       // tvark-ing-as
      'i{s*}k'                    // lenk-išk-as
      '{e.}t'                     // dem-ėt-as
      'ot'                        // garban-ot-as
      'uot' 'iuot'                // lang-uot-as, akin-iuot-as
      // 'tin', nes augintinis    // dirb-tin-is
      // 'ut', nes batutas, degutas etc. // maž-ut-is
      'yt'                        // maž-yt-is
      'iuk'                       // maž-iuk-as
      'iul'                       // maž-ul-is
      '{e.}l'                     // maž-ėl-is
      'yl'                        // maž-yl-is
      'u{c*}iuk'                  // maž-učiuk-as
      'uliuk'                     // maž-uliuk-as
      'ut{e.}ait'                 // maž-utėlait-is
      'ok'                        // did-ok-as
      'iok'                       // višč-iok-as
      'sv' '{s*}v' 'zgan'         // sal-sv-as, pilk-šv-as, bal-zgan-as
      'op' 'iop'                  // dvej-op-as, viener-iop-as
      'ain'                       // apval-ain-as
      'yk{s*}t' 'yk{s*}{c*}'      // ten-ykšt-is, vakar-ykšč-ias

      // laisniai
      'esn'                       // did-esn-is
      'aus' 'iaus'                // nauj-aus-ias, ger-iaus-ias

      // ivardziuotiniai budvardziai (Pronominal adjectives)
      // vyriska gimine (Male gender)
      'ias'                       // žaliasis
      'oj' 'ioj'                  // gerojo, žaliojo
      'aj' 'iaj'                  // gerajam, žaliajam
      '{a'}j' 'i{a'}j'            // garąjį, žaliąjį
      'uoj' 'iuoj'                // geruoju, žaliuoju
      'iej'                       // gerieji
      '{u'}j' 'i{u'}j'            // gerųjų, žaliųjų
      'ies'                       // geriesiems
      'uos' 'iuos'                // geruosius, žaliuosius
      'ais' 'iais'                // geraisiais, žaliaisiais

      // moteriska gimine (Female gender)
      'os' 'ios'                  // gerosios, žaliosios
      '{a'}s' 'i{a'}s'            // gerąsios, žaliąsias

      // būtasis dažninis laikas (frequentative past tense)
      'dav'                       // ei-dav-o

      // dalyvių priesagos (particple suffix)
      'ant' 'iant'
      'int'                       // tur-int-is
      '{e.}j'                     // tur-ėj-o
      '{e'}'                      //
      '{e.}j{e'}'
      '{e'}s'                     // dirb-ęs-is

      'siant'                     // dirb-siant

      // pusdalyviai (participle)
      'dam'                       // bėg-dam-as

      'auj'                       // ūkinink-auj-a
      'jam'
      'iau'
      'am'                        // baiminim-ams-i
    )

    delete
  )

  define fix_conflicts as (
    [substring] among (
      // 'lietuvaite' -> 'lietuvaitė', konfliktas su 'myl-ite'
      'aite' (<-'ait{e.}')
      // 'lietuvaitės' -> 'lietuvaitė', konfliktas su 'myl-itės'
      'ait{e.}s' (<-'ait{e.}')

      // ''ūs-uotės' -> 'ūs-uotė', konfliktas 'mokotės'
      'uot{e.}s' (<-'uot{e.}')
      // ''ūs-uote' -> 'ūs-uotė', konfliktas 'mokote'
      'uote' (<-'uot{e.}')

      // 'žerėjime' -> 'žėrėjimas', konfliktas su 'žais-ime'
      '{e.}jime' (<-'{e.}jimas')

      // 'žvilgesiu' -> 'žvilgesys', konfliktas su 'dirb-siu'
      'esiu' (<-'esys')
      // 'duobkasiu' -> 'duobkasys', konfliktas su 'pakasiu'
      'asius' (<-'asys')

      // 'žioravime' -> 'žioravimas', konfliktas su 'myl-ime'
      'avime' (<-'avimas')
      'ojime' (<-'ojimas')

      // 'advokatės' -> 'advokatė', konfliktas su 'dirb-atės'
      'okat{e.}s' (<-'okat{e.}')
      // 'advokate' -> 'advokatė', konfliktas su 'dirb-ate'
      'okate' (<-'okat{e.}')
    )
  )

  define fix_chdz as (
    [substring] among (
      '{c*}' (<-'t')
      'd{z*}' (<-'d')
    )
  )

  define fix_gd as (
    [substring] among (
      'gd' (<-'g')
      // '{e.}k' (<-'{e.}g')
    )
  )

 )

 define stem as (

  $p1 = limit

  do (
    // priešdėlis 'a' ilgeniuose nei 6 raidės žodžiuose, pvz. 'a-liejus'.
    try (test 'a' $(len > 6) hop 1)

    gopast v  gopast non-v  setmark p1
  )

  backwards (
    do fix_conflicts
    do step1
    do fix_chdz
    do step2
    do fix_chdz
    do fix_gd
  )

 )
--- a/contrib/snowball/algorithms/lovins/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/lovins/stem_ISO_8859_1.sbl
--- a/contrib/snowball/algorithms/nepali.sbl
+++ b/contrib/snowball/algorithms/nepali.sbl
@@ -0,0 +1,92 @@
 /*
 * Authors:
 * - Ingroj Shrestha <ing.stha@gmail.com>, Nepali NLP Group
 * - Oleg Bartunov <obartunov@gmail.com>, Postgres Professional Ltd.
 * - Shreeya Singh Dhakal, Nepali NLP Group
 */

 routines (
 	remove_category_1
 	check_category_2
 	remove_category_2
 	remove_category_3
 )

 stringescapes {}

 stringdef dsc     '{U+0901}'  // DEVANAGARI_SIGN_CANDRABINDU
 stringdef dsa     '{U+0902}'  // DEVANAGARI_SIGN_ANUSVARA
 stringdef dli     '{U+0907}'  // DEVANAGARI_LETTER_I
 stringdef dlii    '{U+0908}'  // DEVANAGARI_LETTER_II
 stringdef dle     '{U+090F}'  // DEVANAGARI_LETTER_E
 stringdef dlka    '{U+0915}'  // DEVANAGARI_LETTER_KA
 stringdef dlkha   '{U+0916}'  // DEVANAGARI_LETTER_KHA
 stringdef dlg     '{U+0917}'  // DEVANAGARI_LETTER_GA
 stringdef dlc     '{U+091B}'  // DEVANAGARI_LETTER_CHA
 stringdef dlta    '{U+0924}'  // DEVANAGARI_LETTER_TA
 stringdef dltha   '{U+0925}'  // DEVANAGARI_LETTER_THA
 stringdef dld     '{U+0926}'  // DEVANAGARI_LETTER_DA
 stringdef dln     '{U+0928}'  // DEVANAGARI_LETTER_NA
 stringdef dlpa    '{U+092A}'  // DEVANAGARI_LETTER_PA
 stringdef dlpha   '{U+092B}'  // DEVANAGARI_LETTER_PHA
 stringdef dlb     '{U+092D}'  // DEVANAGARI_LETTER_BHA
 stringdef dlm     '{U+092E}'  // DEVANAGARI_LETTER_MA
 stringdef dly     '{U+092F}'  // DEVANAGARI_LETTER_YA
 stringdef dlr     '{U+0930}'  // DEVANAGARI_LETTER_RA
 stringdef dll     '{U+0932}'  // DEVANAGARI_LETTER_LA
 stringdef dlv     '{U+0935}'  // DEVANAGARI_LETTER_VA
 stringdef dls     '{U+0938}'  // DEVANAGARI_LETTER_SA
 stringdef dlh     '{U+0939}'  // DEVANAGARI_LETTER_HA
 stringdef dvsaa   '{U+093E}'  // DEVANAGARI_VOWEL_SIGN_AA
 stringdef dvsi    '{U+093F}'  // DEVANAGARI_VOWEL_SIGN_I
 stringdef dvsii   '{U+0940}'  // DEVANAGARI_VOWEL_SIGN_II
 stringdef dvsu    '{U+0941}'  // DEVANAGARI_VOWEL_SIGN_U
 stringdef dvsuu   '{U+0942}'  // DEVANAGARI_VOWEL_SIGN_UU
 stringdef dvse    '{U+0947}'  // DEVANAGARI_VOWEL_SIGN_E
 stringdef dvsai   '{U+0948}'  // DEVANAGARI_VOWEL_SIGN_AI
 stringdef dvso    '{U+094B}'  // DEVANAGARI_VOWEL_SIGN_O
 stringdef dvsau   '{U+094C}'  // DEVANAGARI_VOWEL_SIGN_AU
 stringdef dsv     '{U+094D}'  // DEVANAGARI_SIGN_VIRAMA

 externals ( stem )
 backwardmode (
 	define remove_category_1 as(
 		[substring] among (
 			'{dlm}{dvsaa}{dlr}{dsv}{dlpha}{dlta}' '{dld}{dsv}{dlv}{dvsaa}{dlr}{dvsaa}' '{dls}{dsc}{dlg}{dvsai}' '{dls}{dsa}{dlg}'
 			'{dls}{dsc}{dlg}' '{dll}{dvsaa}{dli}' '{dll}{dvsaa}{dlii}' '{dlpa}{dlc}{dvsi}'
 			'{dll}{dvse}' '{dlr}{dlta}' '{dlm}{dvsai}' '{dlm}{dvsaa}'
 			(delete)
 			'{dlka}{dvso}' '{dlka}{dvsaa}' '{dlka}{dvsi}' '{dlka}{dvsii}' '{dlka}{dvsai}'(('{dle}' or '{dvse}' ()) or delete)
 		)
 	)

 	define check_category_2 as(
 		[substring] among(
 			'{dsc}' '{dsa}' '{dvsai}'
 		)
 	)

 	define remove_category_2 as (
 		[substring] among(
 		'{dsc}' '{dsa}' ('{dly}{dvsau}' or '{dlc}{dvsau}' or '{dln}{dvsau}' or '{dltha}{dvse}' delete)
 		'{dvsai}' ('{dlta}{dsv}{dlr}' delete)
 		)
 	)

 	define remove_category_3 as(
 		[substring] among(
 			'{dltha}{dvsi}{dli}{dls}{dsv}' '{dlh}{dvsu}{dln}{dvse}{dlc}' '{dlh}{dvsu}{dln}{dsv}{dlc}' '{dln}{dvse}{dlc}{dls}{dsv}' '{dln}{dvse}{dlc}{dln}{dsv}' '{dli}{dle}{dlka}{dvsii}' '{dli}{dle}{dlka}{dvsaa}' '{dli}{dle}{dlka}{dvso}' '{dvsi}{dle}{dlka}{dvsii}' '{dvsi}{dle}{dlka}{dvsaa}' '{dvsi}{dle}{dlka}{dvso}' '{dli}{dlc}{dln}{dsv}' '{dvsi}{dlc}{dln}{dsv}' '{dli}{dlc}{dls}{dsv}' '{dvsi}{dlc}{dls}{dsv}' '{dle}{dlc}{dln}{dsv}' '{dvse}{dlc}{dln}{dsv}' '{dle}{dlc}{dls}{dsv}' '{dvse}{dlc}{dls}{dsv}' '{dlc}{dvsi}{dln}{dsv}' '{dlc}{dvse}{dls}{dsv}' '{dlc}{dsv}{dly}{dvsau}' '{dltha}{dvsi}{dln}{dsv}' '{dltha}{dvsi}{dly}{dvso}' '{dltha}{dvsi}{dly}{dvsau}' '{dltha}{dvsi}{dls}{dsv}' '{dltha}{dsv}{dly}{dvso}' '{dltha}{dsv}{dly}{dvsau}' '{dld}{dvsi}{dly}{dvso}' '{dld}{dvse}{dlkha}{dvsi}' '{dld}{dvse}{dlkha}{dvsii}' '{dll}{dvsaa}{dln}{dsv}' '{dlm}{dvsaa}{dltha}{dvsi}' '{dln}{dvse}{dlka}{dvsai}' '{dln}{dvse}{dlka}{dvsaa}' '{dln}{dvse}{dlka}{dvso}' '{dln}{dvse}{dlc}{dvsau}' '{dlh}{dvso}{dls}{dsv}' '{dli}{dln}{dsv}{dlc}' '{dvsi}{dln}{dsv}{dlc}' '{dln}{dvse}{dlc}{dvsu}' '{dli}{dlc}{dvsau}' '{dvsi}{dlc}{dvsau}' '{dli}{dls}{dsv}' '{dvsi}{dls}{dsv}' '{dvsi}{dly}{dvso}' '{dli}{dly}{dvso}' '{dle}{dlka}{dvsaa}' '{dvse}{dlka}{dvsaa}' '{dle}{dlka}{dvsii}' '{dvse}{dlka}{dvsii}' '{dle}{dlka}{dvsai}' '{dvse}{dlka}{dvsai}' '{dle}{dlka}{dvso}' '{dvse}{dlka}{dvso}' '{dle}{dlc}{dvsu}' '{dvse}{dlc}{dvsu}' '{dle}{dlc}{dvsau}' '{dvse}{dlc}{dvsau}' '{dlc}{dln}{dsv}' '{dlc}{dls}{dsv}' '{dltha}{dvsi}{dle}' '{dlpa}{dlr}{dsv}' '{dlb}{dly}{dvso}' '{dlh}{dlr}{dvsu}' '{dlh}{dlr}{dvsuu}' '{dvsi}{dld}{dvsaa}' '{dli}{dld}{dvsaa}' '{dvsi}{dld}{dvso}' '{dli}{dld}{dvso}' '{dvsi}{dld}{dvsai}' '{dli}{dld}{dvsai}' '{dln}{dvse}{dlc}' '{dli}{dlc}' '{dvsi}{dlc}' '{dle}{dlc}' '{dvse}{dlc}' '{dlc}{dvsu}' '{dlc}{dvse}' '{dlc}{dvsau}' '{dltha}{dvsii}' '{dltha}{dvse}' '{dld}{dvsaa}' '{dld}{dvsii}' '{dld}{dvsai}' '{dld}{dvso}' '{dln}{dvsu}' '{dln}{dvse}' '{dly}{dvso}' '{dly}{dvsau}' '{dlc}'
 			(delete)
 		)
 	)

 )

 define stem as (
 	backwards (
 		do remove_category_1
 			do (
 				repeat (do (check_category_2 and remove_category_2) remove_category_3)
 			)
 	)
 )
--- a/contrib/snowball/algorithms/norwegian/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/norwegian/stem_ISO_8859_1.sbl
@@ -13,11 +13,11 @@ groupings ( v s_ending )

 stringescapes {}

 /* special characters (in ISO Latin I) */
 /* special characters */

 stringdef ae   hex 'E6'
 stringdef ao   hex 'E5'
 stringdef o/   hex 'F8'
 stringdef ae   '{U+00E6}'
 stringdef ao   '{U+00E5}'
 stringdef o/   '{U+00F8}'

 define v 'aeiouy{ae}{ao}{o/}'

--- a/contrib/snowball/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
+++ b/contrib/snowball/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
@@ -1,80 +0,0 @@
 routines (
           mark_regions
           main_suffix
           consonant_pair
           other_suffix
 )

 externals ( stem )

 integers ( p1 x )

 groupings ( v s_ending )

 stringescapes {}

 /* special characters (in MS-DOS Latin I) */

 stringdef ae   hex '91'
 stringdef ao   hex '86'
 stringdef o/   hex '9B'

 define v 'aeiouy{ae}{ao}{o/}'

 define s_ending  'bcdfghjlmnoprtvyz'

 define mark_regions as (

    $p1 = limit

    test ( hop 3 setmark x )
    goto v  gopast non-v  setmark p1
    try ( $p1 < x  $p1 = x )
 )

 backwardmode (

    define main_suffix as (
        setlimit tomark p1 for ([substring])
        among(

            'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
            'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
            'hetens' 'ers' 'ets' 'et' 'het' 'ast'
                (delete)
            's'
                (s_ending or ('k' non-v) delete)
            'erte' 'ert'
                (<-'er')
        )
    )

    define consonant_pair as (
        test (
            setlimit tomark p1 for ([substring])
            among(
                'dt' 'vt'
            )
        )
        next] delete
    )

    define other_suffix as (
        setlimit tomark p1 for ([substring])
        among(
            'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
            'hetslov'
                (delete)
        )
    )
 )

 define stem as (

    do mark_regions
    backwards (
        do main_suffix
        do consonant_pair
        do other_suffix
    )
 )
--- a/contrib/snowball/algorithms/porter/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/porter/stem_ISO_8859_1.sbl
--- a/contrib/snowball/algorithms/portuguese/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/portuguese/stem_ISO_8859_1.sbl
@@ -15,20 +15,20 @@ groupings ( v )

 stringescapes {}

 /* special characters (in ISO Latin I) */
 /* special characters */

 stringdef a'   hex 'E1'  // a-acute
 stringdef a^   hex 'E2'  // a-circumflex e.g. 'bota^nico
 stringdef e'   hex 'E9'  // e-acute
 stringdef e^   hex 'EA'  // e-circumflex
 stringdef i'   hex 'ED'  // i-acute
 stringdef o^   hex 'F4'  // o-circumflex
 stringdef o'   hex 'F3'  // o-acute
 stringdef u'   hex 'FA'  // u-acute
 stringdef c,   hex 'E7'  // c-cedilla
 stringdef a'   '{U+00E1}'  // a-acute
 stringdef a^   '{U+00E2}'  // a-circumflex e.g. 'bota^nico
 stringdef e'   '{U+00E9}'  // e-acute
 stringdef e^   '{U+00EA}'  // e-circumflex
 stringdef i'   '{U+00ED}'  // i-acute
 stringdef o^   '{U+00F4}'  // o-circumflex
 stringdef o'   '{U+00F3}'  // o-acute
 stringdef u'   '{U+00FA}'  // u-acute
 stringdef c,   '{U+00E7}'  // c-cedilla

 stringdef a~   hex 'E3'  // a-tilde
 stringdef o~   hex 'F5'  // o-tilde
 stringdef a~   '{U+00E3}'  // a-tilde
 stringdef o~   '{U+00F5}'  // o-tilde


 define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
@@ -92,12 +92,12 @@ backwardmode (
            (
                R2 delete
            )
            'log{i'}a'
            'log{i'}as'
            'logia'
            'logias'
            (
                R2 <- 'log'
            )
            'uci{o'}n' 'uciones'
            'u{c,}a~o' 'u{c,}o~es'
            (
                R2 <- 'u'
            )
--- a/contrib/snowball/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
+++ b/contrib/snowball/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
@@ -1,218 +0,0 @@
 routines (
           prelude postlude mark_regions
           RV R1 R2
           standard_suffix
           verb_suffix
           residual_suffix
           residual_form
 )

 externals ( stem )

 integers ( pV p1 p2 )

 groupings ( v )

 stringescapes {}

 /* special characters (in MS-DOS Latin I) */

 stringdef a'   hex 'A0'  // a-acute
 stringdef a^   hex '83'  // a-circumflex e.g. 'bota^nico
 stringdef e'   hex '82'  // e-acute
 stringdef e^   hex '88'  // e-circumflex
 stringdef i'   hex 'A1'  // i-acute
 stringdef o^   hex '93'  // o-circumflex
 stringdef o'   hex 'A2'  // o-acute
 stringdef u'   hex 'A3'  // u-acute
 stringdef c,   hex '87'  // c-cedilla

 stringdef a~   hex 'C6'  // a-tilde
 stringdef o~   hex 'E4'  // o-tilde


 define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'

 define prelude as repeat (
    [substring] among(
        '{a~}' (<- 'a~')
        '{o~}' (<- 'o~')
        ''     (next)
    ) //or next
 )

 define mark_regions as (

    $pV = limit
    $p1 = limit
    $p2 = limit  // defaults

    do (
        ( v (non-v gopast v) or (v gopast non-v) )
        or
        ( non-v (non-v gopast v) or (v next) )
        setmark pV
    )
    do (
        gopast v gopast non-v setmark p1
        gopast v gopast non-v setmark p2
    )
 )

 define postlude as repeat (
    [substring] among(
        'a~' (<- '{a~}')
        'o~' (<- '{o~}')
        ''   (next)
    ) //or next
 )

 backwardmode (

    define RV as $pV <= cursor
    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define standard_suffix as (
        [substring] among(

            'eza' 'ezas'
            'ico' 'ica' 'icos' 'icas'
            'ismo' 'ismos'
            '{a'}vel'
            '{i'}vel'
            'ista' 'istas'
            'oso' 'osa' 'osos' 'osas'
            'amento' 'amentos'
            'imento' 'imentos'

           'adora' 'ador' 'a{c,}a~o'
           'adoras' 'adores' 'a{c,}o~es'  // no -ic test
           'ante' 'antes' '{a^}ncia' // Note 1
            (
                R2 delete
            )
            'log{i'}a'
            'log{i'}as'
            (
                R2 <- 'log'
            )
            'uci{o'}n' 'uciones'
            (
                R2 <- 'u'
            )
            '{e^}ncia' '{e^}ncias'
            (
                R2 <- 'ente'
            )
            'amente'
            (
                R1 delete
                try (
                    [substring] R2 delete among(
                        'iv' (['at'] R2 delete)
                        'os'
                        'ic'
                        'ad'
                    )
                )
            )
            'mente'
            (
                R2 delete
                try (
                    [substring] among(
                        'ante' // Note 1
                        'avel'
                        '{i'}vel' (R2 delete)
                    )
                )
            )
            'idade'
            'idades'
            (
                R2 delete
                try (
                    [substring] among(
                        'abil'
                        'ic'
                        'iv'   (R2 delete)
                    )
                )
            )
            'iva' 'ivo'
            'ivas' 'ivos'
            (
                R2 delete
                try (
                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
                )
            )
            'ira' 'iras'
            (
                RV 'e'  // -eira -eiras usually non-verbal
                <- 'ir'
            )
        )
    )

    define verb_suffix as setlimit tomark pV for (
        [substring] among(
            'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
            'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
            'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
            'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
            'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
            'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
            'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
            'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
            'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
            'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
            '{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
            '{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
            '{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
            'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
            'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
            '{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'

            'ira' 'iras'
                (delete)
        )
    )

    define residual_suffix as (
        [substring] among(
            'os'
            'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
                ( RV delete )
        )
    )

    define residual_form as (
        [substring] among(
            'e' '{e'}' '{e^}'
                ( RV delete [('u'] test 'g') or
                             ('i'] test 'c') RV delete )
            '{c,}' (<-'c')
        )
    )
 )

 define stem as (
    do prelude
    do mark_regions
    backwards (
        do (
            ( ( standard_suffix or verb_suffix )
              and do ( ['i'] test 'c' RV delete )
            )
            or residual_suffix
        )
        do residual_form
    )
    do postlude
 )

 /*
    Note 1: additions of 15 Jun 2005
 */
--- a/contrib/snowball/algorithms/romanian/stem_ISO_8859_2.sbl
+++ b/contrib/snowball/algorithms/romanian/stem_ISO_8859_2.sbl
@@ -20,11 +20,11 @@ stringescapes {}

 /* special characters */

 stringdef a^   hex 'E2'  // a circumflex
 stringdef i^   hex 'EE'  // i circumflex
 stringdef a+   hex 'E3'  // a breve
 stringdef s,   hex 'BA'  // s cedilla
 stringdef t,   hex 'FE'  // t cedilla
 stringdef a^   '{U+00E2}'  // a circumflex
 stringdef i^   '{U+00EE}'  // i circumflex
 stringdef a+   '{U+0103}'  // a breve
 stringdef s,   '{U+015F}'  // s cedilla
 stringdef t,   '{U+0163}'  // t cedilla

 define v 'aeiou{a^}{i^}{a+}'

--- a/contrib/snowball/algorithms/romanian/stem_Unicode.sbl
+++ b/contrib/snowball/algorithms/romanian/stem_Unicode.sbl
@@ -1,236 +0,0 @@

 routines (
           prelude postlude mark_regions
           RV R1 R2
           step_0
           standard_suffix combo_suffix
           verb_suffix
           vowel_suffix
 )

 externals ( stem )

 integers ( pV p1 p2 )

 groupings ( v )

 booleans  ( standard_suffix_removed )

 stringescapes {}

 /* special characters */

 stringdef a^   hex '0E2'  // a circumflex
 stringdef i^   hex '0EE'  // i circumflex
 stringdef a+   hex '103'  // a breve
 stringdef s,   hex '15F'  // s cedilla
 stringdef t,   hex '163'  // t cedilla

 define v 'aeiou{a^}{i^}{a+}'

 define prelude as (
    repeat goto (
        v [ ('u' ] v <- 'U') or
            ('i' ] v <- 'I')
    )
 )

 define mark_regions as (

    $pV = limit
    $p1 = limit
    $p2 = limit // defaults

    do (
        ( v (non-v gopast v) or (v gopast non-v) )
        or
        ( non-v (non-v gopast v) or (v next) )
        setmark pV
    )
    do (
        gopast v gopast non-v setmark p1
        gopast v gopast non-v setmark p2
    )
 )

 define postlude as repeat (

    [substring] among(
        'I'  (<- 'i')
        'U'  (<- 'u')
        ''   (next)
    )

 )

 backwardmode (

    define RV as $pV <= cursor
    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define step_0 as (
        [substring] R1 among(
            'ul' 'ului'
                ( delete )
            'aua'
                ( <-'a' )
            'ea' 'ele' 'elor'
                ( <-'e' )
            'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
                ( <-'i')
            'ile'
                ( not 'ab' <- 'i' )
            'atei'
                ( <- 'at' )
            'a{t,}ie' 'a{t,}ia'
                ( <- 'a{t,}i' )
        )
    )

    define combo_suffix as test (
        [substring] R1 (
            among(
            /* 'IST'. alternative: include the following
                'alism' 'alisme'
                'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
                    <- 'al'
                )
            */
                'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
                    <- 'abil'
                )
                'ibilitate' (
                    <- 'ibil'
                )
                'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
                    <- 'iv'
                )
                'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
                'icator' 'icatori'
                'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
                'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
                    <- 'ic'
                )
                'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
                'atoare' 'ator' 'atori'
                '{a+}toare' '{a+}tor' '{a+}tori' (
                    <- 'at'
                )
                'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
                'itoare' 'itor' 'itori' (
                    <- 'it'
                )
            )
            set standard_suffix_removed
        )
    )

    define standard_suffix as (
        unset standard_suffix_removed
        repeat combo_suffix
        [substring] R2 (
            among(

                // past participle is treated here, rather than
                // as a verb ending:
                'at' 'ata' 'at{a+}' 'ati' 'ate'
                'ut' 'uta' 'ut{a+}' 'uti' 'ute'
                'it' 'ita' 'it{a+}' 'iti' 'ite'

                'ic' 'ica' 'ice' 'ici' 'ic{a+}'
                'abil' 'abila' 'abile' 'abili' 'abil{a+}'
                'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
                'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
                'ant' 'anta' 'ante' 'anti' 'ant{a+}'
                'ator' 'atori'
                'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
                'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
                    delete
                )
                'iune' 'iuni' (
                    '{t,}'] <- 't'
                )
                'ism' 'isme'
                'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
                    <- 'ist'
                    /* 'IST'. alternative: remove with <- '' */
                )
            )
            set standard_suffix_removed
        )
    )

    define verb_suffix as setlimit tomark pV for (
        [substring] among(
            // 'long' infinitive:
            'are' 'ere' 'ire' '{a^}re'

            // gerund:
            'ind' '{a^}nd'
            'indu' '{a^}ndu'

            'eze'
            'easc{a+}'
            // present:
            'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
            'e{s,}te'
            '{a+}sc' '{a+}{s,}ti'
            '{a+}{s,}te'

            // imperfect:
            'am' 'ai' 'au'
            'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
            'iam' 'iai' 'ia' 'ia{t,}i' 'iau'

            // past: // (not 'ii')
            'ui'
            'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
            'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
            'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
            '{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'

            // pluferfect:
            'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
            'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
            '{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
            '{a^}ser{a+}'
            'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'

                ( non-v or 'u'  delete )

            // present:
            '{a+}m' 'a{t,}i'
            'em' 'e{t,}i'
            'im' 'i{t,}i'
            '{a^}m' '{a^}{t,}i'

            // past:
            'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
            'sei' 'se'

            // pluperfect:
            'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
                (delete)
        )
    )

    define vowel_suffix as (
        [substring] RV among (
            'a' 'e' 'i' 'ie' '{a+}' ( delete )
        )
    )
 )

 define stem as (
    do prelude
    do mark_regions
    backwards (
        do step_0
        do standard_suffix
        do ( standard_suffix_removed or verb_suffix )
        do vowel_suffix
    )
    do postlude
 )

--- a/contrib/snowball/algorithms/russian/stem_KOI8_R.sbl
+++ b/contrib/snowball/algorithms/russian/stem_KOI8_R.sbl
@@ -1,41 +1,41 @@
 stringescapes {}

 /* the 32 Cyrillic letters in the KOI8-R coding scheme, and represented
   in Latin characters following the conventions of the standard Library
   of Congress transliteration: */

 stringdef a    hex 'C1'
 stringdef b    hex 'C2'
 stringdef v    hex 'D7'
 stringdef g    hex 'C7'
 stringdef d    hex 'C4'
 stringdef e    hex 'C5'
 stringdef zh   hex 'D6'
 stringdef z    hex 'DA'
 stringdef i    hex 'C9'
 stringdef i`   hex 'CA'
 stringdef k    hex 'CB'
 stringdef l    hex 'CC'
 stringdef m    hex 'CD'
 stringdef n    hex 'CE'
 stringdef o    hex 'CF'
 stringdef p    hex 'D0'
 stringdef r    hex 'D2'
 stringdef s    hex 'D3'
 stringdef t    hex 'D4'
 stringdef u    hex 'D5'
 stringdef f    hex 'C6'
 stringdef kh   hex 'C8'
 stringdef ts   hex 'C3'
 stringdef ch   hex 'DE'
 stringdef sh   hex 'DB'
 stringdef shch hex 'DD'
 stringdef "    hex 'DF'
 stringdef y    hex 'D9'
 stringdef '    hex 'D8'
 stringdef e`   hex 'DC'
 stringdef iu   hex 'C0'
 stringdef ia   hex 'D1'
 /* the 33 Cyrillic letters represented in ASCII characters following the
 * conventions of the standard Library of Congress transliteration: */

 stringdef a    '{U+0430}'
 stringdef b    '{U+0431}'
 stringdef v    '{U+0432}'
 stringdef g    '{U+0433}'
 stringdef d    '{U+0434}'
 stringdef e    '{U+0435}'
 stringdef e"   '{U+0451}'
 stringdef zh   '{U+0436}'
 stringdef z    '{U+0437}'
 stringdef i    '{U+0438}'
 stringdef i`   '{U+0439}'
 stringdef k    '{U+043A}'
 stringdef l    '{U+043B}'
 stringdef m    '{U+043C}'
 stringdef n    '{U+043D}'
 stringdef o    '{U+043E}'
 stringdef p    '{U+043F}'
 stringdef r    '{U+0440}'
 stringdef s    '{U+0441}'
 stringdef t    '{U+0442}'
 stringdef u    '{U+0443}'
 stringdef f    '{U+0444}'
 stringdef kh   '{U+0445}'
 stringdef ts   '{U+0446}'
 stringdef ch   '{U+0447}'
 stringdef sh   '{U+0448}'
 stringdef shch '{U+0449}'
 stringdef "    '{U+044A}'
 stringdef y    '{U+044B}'
 stringdef '    '{U+044C}'
 stringdef e`   '{U+044D}'
 stringdef iu   '{U+044E}'
 stringdef ia   '{U+044F}'

 routines ( mark_regions R2
           perfective_gerund
@@ -200,6 +200,10 @@ backwardmode (

 define stem as (

    // Normalise {e"} to {e}.  The documentation has long suggested the user
    // should do this before calling the stemmer - we now do it for them.
    do repeat ( goto (['{e"}']) <- '{e}' )

    do mark_regions
    backwards setlimit tomark pV for (
        do (
--- a/contrib/snowball/algorithms/russian/stem_Unicode.sbl
+++ b/contrib/snowball/algorithms/russian/stem_Unicode.sbl
@@ -1,215 +0,0 @@
 stringescapes {}

 /* the 32 Cyrillic letters in Unicode */

 stringdef a    hex '430'
 stringdef b    hex '431'
 stringdef v    hex '432'
 stringdef g    hex '433'
 stringdef d    hex '434'
 stringdef e    hex '435'
 stringdef zh   hex '436'
 stringdef z    hex '437'
 stringdef i    hex '438'
 stringdef i`   hex '439'
 stringdef k    hex '43A'
 stringdef l    hex '43B'
 stringdef m    hex '43C'
 stringdef n    hex '43D'
 stringdef o    hex '43E'
 stringdef p    hex '43F'
 stringdef r    hex '440'
 stringdef s    hex '441'
 stringdef t    hex '442'
 stringdef u    hex '443'
 stringdef f    hex '444'
 stringdef kh   hex '445'
 stringdef ts   hex '446'
 stringdef ch   hex '447'
 stringdef sh   hex '448'
 stringdef shch hex '449'
 stringdef "    hex '44A'
 stringdef y    hex '44B'
 stringdef '    hex '44C'
 stringdef e`   hex '44D'
 stringdef iu   hex '44E'
 stringdef ia   hex '44F'

 routines ( mark_regions R2
           perfective_gerund
           adjective
           adjectival
           reflexive
           verb
           noun
           derivational
           tidy_up
 )

 externals ( stem )

 integers ( pV p2 )

 groupings ( v )

 define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'

 define mark_regions as (

    $pV = limit
    $p2 = limit
    do (
        gopast v  setmark pV  gopast non-v
        gopast v  gopast non-v  setmark p2
       )
 )

 backwardmode (

    define R2 as $p2 <= cursor

    define perfective_gerund as (
        [substring] among (
            '{v}'
            '{v}{sh}{i}'
            '{v}{sh}{i}{s}{'}'
                ('{a}' or '{ia}' delete)
            '{i}{v}'
            '{i}{v}{sh}{i}'
            '{i}{v}{sh}{i}{s}{'}'
            '{y}{v}'
            '{y}{v}{sh}{i}'
            '{y}{v}{sh}{i}{s}{'}'
                (delete)
        )
    )

    define adjective as (
        [substring] among (
            '{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
            '{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
            '{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
            '{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
            '{ia}{ia}'
                        // and -
            '{o}{iu}'   // - which is somewhat archaic
            '{e}{iu}'   // - soft form of {o}{iu}
                (delete)
        )
    )

    define adjectival as (
        adjective

        /* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
           nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
           errors. Removing im, uem, enn creates too many errors.
        */

        try (
            [substring] among (
                '{e}{m}'                  // present passive participle
                '{n}{n}'                  // adjective from past passive participle
                '{v}{sh}'                 // past active participle
                '{iu}{shch}' '{shch}'     // present active participle
                    ('{a}' or '{ia}' delete)

     //but not  '{i}{m}' '{u}{e}{m}'      // present passive participle
     //or       '{e}{n}{n}'               // adjective from past passive participle

                '{i}{v}{sh}' '{y}{v}{sh}'// past active participle
                '{u}{iu}{shch}'          // present active participle
                    (delete)
            )
        )

    )

    define reflexive as (
        [substring] among (
            '{s}{ia}'
            '{s}{'}'
                (delete)
        )
    )

    define verb as (
        [substring] among (
            '{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
            '{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
            '{n}{y}' '{t}{'}' '{e}{sh}{'}'

            '{n}{n}{o}'
                ('{a}' or '{ia}' delete)

            '{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
            '{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
            '{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
            '{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
            '{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
            '{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
                (delete)
            /* note the short passive participle tests:
               '{n}{a}' '{n}' '{n}{o}' '{n}{y}'
               '{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
            */
        )
    )

    define noun as (
        [substring] among (
            '{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
            '{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
            '{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
            '{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
            '{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
            '{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
                (delete)
            /* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
               '{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
               omitted - they only occur on 12 words.
            */
        )
    )

    define derivational as (
        [substring] R2 among (
            '{o}{s}{t}'
            '{o}{s}{t}{'}'
                (delete)
        )
    )

    define tidy_up as (
        [substring] among (

            '{e}{i`}{sh}'
            '{e}{i`}{sh}{e}'  // superlative forms
               (delete
                ['{n}'] '{n}' delete
               )
            '{n}'
               ('{n}' delete) // e.g. -nno endings
            '{'}'
               (delete)  // with some slight false conflations
        )
    )
 )

 define stem as (

    do mark_regions
    backwards setlimit tomark pV for (
        do (
             perfective_gerund or
             ( try reflexive
               adjectival or verb or noun
             )
        )
        try([ '{i}' ] delete)
        // because noun ending -i{iu} is being treated as verb ending -{iu}

        do derivational
        do tidy_up
    )
 )
--- a/contrib/snowball/algorithms/serbian.sbl
+++ b/contrib/snowball/algorithms/serbian.sbl
--- a/contrib/snowball/algorithms/spanish/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/spanish/stem_ISO_8859_1.sbl
@@ -16,15 +16,15 @@ groupings ( v )

 stringescapes {}

 /* special characters (in ISO Latin I) */

 stringdef a'   hex 'E1'  // a-acute
 stringdef e'   hex 'E9'  // e-acute
 stringdef i'   hex 'ED'  // i-acute
 stringdef o'   hex 'F3'  // o-acute
 stringdef u'   hex 'FA'  // u-acute
 stringdef u"   hex 'FC'  // u-diaeresis
 stringdef n~   hex 'F1'  // n-tilde
 /* special characters */

 stringdef a'   '{U+00E1}'  // a-acute
 stringdef e'   '{U+00E9}'  // e-acute
 stringdef i'   '{U+00ED}'  // i-acute
 stringdef o'   '{U+00F3}'  // o-acute
 stringdef u'   '{U+00FA}'  // u-acute
 stringdef u"   '{U+00FC}'  // u-diaeresis
 stringdef n~   '{U+00F1}'  // n-tilde

 define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'

--- a/contrib/snowball/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
+++ b/contrib/snowball/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
@@ -1,230 +0,0 @@
 routines (
           postlude mark_regions
           RV R1 R2
           attached_pronoun
           standard_suffix
           y_verb_suffix
           verb_suffix
           residual_suffix
 )

 externals ( stem )

 integers ( pV p1 p2 )

 groupings ( v )

 stringescapes {}

 /* special characters (in MS-DOS Latin I) */

 stringdef a'   hex 'A0'  // a-acute
 stringdef e'   hex '82'  // e-acute
 stringdef i'   hex 'A1'  // i-acute
 stringdef o'   hex 'A2'  // o-acute
 stringdef u'   hex 'A3'  // u-acute
 stringdef u"   hex '81'  // u-diaeresis
 stringdef n~   hex 'A4'  // n-tilde

 define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'

 define mark_regions as (

    $pV = limit
    $p1 = limit
    $p2 = limit  // defaults

    do (
        ( v (non-v gopast v) or (v gopast non-v) )
        or
        ( non-v (non-v gopast v) or (v next) )
        setmark pV
    )
    do (
        gopast v gopast non-v setmark p1
        gopast v gopast non-v setmark p2
    )
 )

 define postlude as repeat (
    [substring] among(
        '{a'}' (<- 'a')
        '{e'}' (<- 'e')
        '{i'}' (<- 'i')
        '{o'}' (<- 'o')
        '{u'}' (<- 'u')
        // and possibly {u"}->u here, or in prelude
        ''     (next)
    ) //or next
 )

 backwardmode (

    define RV as $pV <= cursor
    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define attached_pronoun as (
        [substring] among(
            'me' 'se'  'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
            'las' 'les' 'los' 'nos'
        )
        substring RV among(
            'i{e'}ndo' (] <- 'iendo')
            '{a'}ndo'  (] <- 'ando')
            '{a'}r'    (] <- 'ar')
            '{e'}r'    (] <- 'er')
            '{i'}r'    (] <- 'ir')
            'ando'
            'iendo'
            'ar' 'er' 'ir'
                       (delete)
            'yendo'    ('u' delete)
        )
    )

    define standard_suffix as (
        [substring] among(

            'anza' 'anzas'
            'ico' 'ica' 'icos' 'icas'
            'ismo' 'ismos'
            'able' 'ables'
            'ible' 'ibles'
            'ista' 'istas'
            'oso' 'osa' 'osos' 'osas'
            'amiento' 'amientos'
            'imiento' 'imientos'
            (
                R2 delete
            )
            'adora' 'ador' 'aci{o'}n'
            'adoras' 'adores' 'aciones'
            'ante' 'antes' 'ancia' 'ancias'// Note 1
            (
                R2 delete
                try ( ['ic'] R2 delete )
            )
            'log{i'}a'
            'log{i'}as'
            (
                R2 <- 'log'
            )
            'uci{o'}n' 'uciones'
            (
                R2 <- 'u'
            )
            'encia' 'encias'
            (
                R2 <- 'ente'
            )
            'amente'
            (
                R1 delete
                try (
                    [substring] R2 delete among(
                        'iv' (['at'] R2 delete)
                        'os'
                        'ic'
                        'ad'
                    )
                )
            )
            'mente'
            (
                R2 delete
                try (
                    [substring] among(
                        'ante' // Note 1
                        'able'
                        'ible' (R2 delete)
                    )
                )
            )
            'idad'
            'idades'
            (
                R2 delete
                try (
                    [substring] among(
                        'abil'
                        'ic'
                        'iv'   (R2 delete)
                    )
                )
            )
            'iva' 'ivo'
            'ivas' 'ivos'
            (
                R2 delete
                try (
                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
                )
            )
        )
    )

    define y_verb_suffix as (
        setlimit tomark pV for ([substring]) among(
            'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
            'yas' 'yes' 'yais' 'yamos'
                ('u' delete)
        )
    )

    define verb_suffix as (
        setlimit tomark pV for ([substring]) among(

            'en' 'es' '{e'}is' 'emos'
                (try ('u' test 'g') ] delete)

            'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
            'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
            'ar{e'}'
            'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
            'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
            'er{e'}'
            'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
            'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
            'ir{e'}'

            'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
            'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
            'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
            'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
            'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
            'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
            'ierais'  'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
            'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
            '{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
                (delete)
        )
    )

    define residual_suffix as (
        [substring] among(
            'os'
            'a' 'o' '{a'}' '{i'}' '{o'}'
                ( RV delete )
            'e' '{e'}'
                ( RV delete try( ['u'] test 'g' RV delete ) )
        )
    )
 )

 define stem as (
    do mark_regions
    backwards (
        do attached_pronoun
        do ( standard_suffix or
             y_verb_suffix or
             verb_suffix
           )
        do residual_suffix
    )
    do postlude
 )

 /*
    Note 1: additions of 15 Jun 2005
 */
--- a/contrib/snowball/algorithms/swedish/stem_ISO_8859_1.sbl
+++ b/contrib/snowball/algorithms/swedish/stem_ISO_8859_1.sbl
@@ -13,11 +13,11 @@ groupings ( v s_ending )

 stringescapes {}

 /* special characters (in ISO Latin I) */
 /* special characters */

 stringdef a"   hex 'E4'
 stringdef ao   hex 'E5'
 stringdef o"   hex 'F6'
 stringdef a"   '{U+00E4}'
 stringdef ao   '{U+00E5}'
 stringdef o"   '{U+00F6}'

 define v 'aeiouy{a"}{ao}{o"}'

--- a/contrib/snowball/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
+++ b/contrib/snowball/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
@@ -1,72 +0,0 @@
 routines (
           mark_regions
           main_suffix
           consonant_pair
           other_suffix
 )

 externals ( stem )

 integers ( p1 x )

 groupings ( v s_ending )

 stringescapes {}

 /* special characters (in MS-DOS Latin I) */

 stringdef a"   hex '84'
 stringdef ao   hex '86'
 stringdef o"   hex '94'

 define v 'aeiouy{a"}{ao}{o"}'

 define s_ending  'bcdfghjklmnoprtvy'

 define mark_regions as (

    $p1 = limit
    test ( hop 3 setmark x )
    goto v gopast non-v  setmark p1
    try ( $p1 < x  $p1 = x )
 )

 backwardmode (

    define main_suffix as (
        setlimit tomark p1 for ([substring])
        among(

            'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
            'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
            'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
            'hetens' 'erns' 'at' 'andet' 'het' 'ast'
                (delete)
            's'
                (s_ending delete)
        )
    )

    define consonant_pair as setlimit tomark p1 for (
        among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
        and ([next] delete)
    )

    define other_suffix as setlimit tomark p1 for (
        [substring] among(
            'lig' 'ig' 'els' (delete)
            'l{o"}st'        (<-'l{o"}s')
            'fullt'          (<-'full')
        )
    )
 )

 define stem as (

    do mark_regions
    backwards (
        do main_suffix
        do consonant_pair
        do other_suffix
    )
 )
--- a/contrib/snowball/algorithms/tamil.sbl
+++ b/contrib/snowball/algorithms/tamil.sbl
@@ -0,0 +1,405 @@
 /*
 * Affix stripping stemming algorithm for Tamil
 * By Damodharan Rajalingam
 */

 stringescapes {}

 /* Aytham */
 stringdef aytham   '{U+0B83}'

 /* Uyir - independent vowels */
 stringdef a        '{U+0B85}'
 stringdef aa       '{U+0B86}'
 stringdef i        '{U+0B87}'
 stringdef ii       '{U+0B88}'
 stringdef u        '{U+0B89}'
 stringdef uu       '{U+0B8A}'
 stringdef e        '{U+0B8E}'
 stringdef ee       '{U+0B8F}'
 stringdef ai       '{U+0B90}'
 stringdef o        '{U+0B92}'
 stringdef oo       '{U+0B93}'
 stringdef au       '{U+0B94}'

 /* Consonants */
 stringdef ka       '{U+0B95}'
 stringdef nga      '{U+0B99}'
 stringdef ca       '{U+0B9A}'
 stringdef ja       '{U+0B9C}'
 stringdef nya      '{U+0B9E}'
 stringdef tta      '{U+0B9F}'
 stringdef nna      '{U+0BA3}'
 stringdef ta       '{U+0BA4}'
 stringdef tha      '{U+0BA4}'
 stringdef na       '{U+0BA8}'
 stringdef nnna     '{U+0BA9}'
 stringdef pa       '{U+0BAA}'
 stringdef ma       '{U+0BAE}'
 stringdef ya       '{U+0BAF}'
 stringdef ra       '{U+0BB0}'
 stringdef rra      '{U+0BB1}'
 stringdef la       '{U+0BB2}'
 stringdef lla      '{U+0BB3}'
 stringdef llla     '{U+0BB4}'
 stringdef zha      '{U+0BB4}'
 stringdef va       '{U+0BB5}'

 /* Vatamozi - borrowed */
 stringdef sha      '{U+0BB6}'
 stringdef ssa      '{U+0BB7}'
 stringdef sa       '{U+0BB8}'
 stringdef ha       '{U+0BB9}'


 /* Dependent vowel signs (kombu etc.) */
 stringdef vs_aa    '{U+0BBE}'
 stringdef vs_i     '{U+0BBF}'
 stringdef vs_ii    '{U+0BC0}'
 stringdef vs_u     '{U+0BC1}'
 stringdef vs_uu    '{U+0BC2}'
 stringdef vs_e     '{U+0BC6}'
 stringdef vs_ee    '{U+0BC7}'
 stringdef vs_ai    '{U+0BC8}'
 stringdef vs_o     '{U+0BCA}'
 stringdef vs_oo    '{U+0BCB}'
 stringdef vs_au    '{U+0BCC}'

 /* Pulli */
 stringdef pulli    '{U+0BCD}'

 /* AU length markk */
 stringdef au_lmark '{U+0BD7}'


 routines (
 remove_plural_suffix
 remove_question_suffixes
 remove_question_prefixes
 remove_pronoun_prefixes
 remove_command_suffixes
 remove_um
 remove_vetrumai_urupukal
 fix_va_start
 fix_ending
 fix_endings
 remove_tense_suffix
 remove_tense_suffixes
 remove_common_word_endings
 has_min_length
 )

 externals ( stem )

 booleans (
 found_a_match
 found_vetrumai_urupu
 )

 define has_min_length as (
 $(len > 4)
 )

 define fix_va_start as (
 (try '{va}{vs_oo}' and [ '{va}{vs_oo}' ] <- '{oo}' ) or
 (try '{va}{vs_o}' and [ '{va}{vs_o}' ] <- '{o}' ) or
 (try '{va}{vs_u}' and [ '{va}{vs_u}' ] <- '{u}' ) or
 (try '{va}{vs_uu}' and [ '{va}{vs_uu}' ] <- '{uu}' )
 )

 define fix_endings as (
 do repeat fix_ending
 )

 define remove_question_prefixes as (
 [ ('{e}' ) among('{ka}' '{ca}' '{tha}' '{va}' '{na}' '{pa}' '{ma}' '{ya}' '{nga}' '{nya}') '{pulli}' ] delete
 do fix_va_start
 )

 // Gives signal t if an ending was fixed, signal f otherwise.
 define fix_ending as (
 $(len > 3)
 backwards (
  ( [among('{na}{pulli}' '{na}{pulli}{ta}' '{na}{pulli}{ta}{pulli}') ] delete )
  or
  ( ['{ya}{pulli}' test among('{vs_ai}' '{vs_i}' '{vs_ii}') ] delete )
  or
  ( [ '{tta}{pulli}{pa}{pulli}' or '{tta}{pulli}{ka}{pulli}' ] <- '{lla}{pulli}' )
  or
  ( [ '{nnna}{pulli}{rra}{pulli}' ] <- '{la}{pulli}' )
  or
 //		( [ '{rra}{pulli}{ka}{pulli}'  or '{nnna}{pulli}{nnna}{pulli}' ] <- '{la}{pulli}'  )
  ( [ '{rra}{pulli}{ka}{pulli}' ] <- '{la}{pulli}' )
  or
  ( [ '{tta}{pulli}{tta}{pulli}' ] <- '{tta}{vs_u}' )
  or
  ( found_vetrumai_urupu [ '{ta}{pulli}{ta}{pulli}' (test not '{vs_ai}') ] <- '{ma}{pulli}' ] )
  or
  ( [ '{vs_u}{ka}{pulli}' or '{vs_u}{ka}{pulli}{ka}{pulli}' ] <- '{pulli}' )
  or
  ( [ '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') ] delete )
  or
  ( [ '{vs_u}{ka}{pulli}' ] <- '{pulli}' )
  or
  ( [ '{pulli}' among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') ] delete )
  or
  ( [ '{pulli}' (among('{ya}' '{ra}' '{la}' '{va}' '{zha}' '{lla}') or among('{nga}' '{nya}' '{nna}' '{na}' '{ma}' '{nnna}')) '{pulli}' ] <- '{pulli}' )
  or
  ( [ among('{va}' '{ya}' '{va}{pulli}') ] delete )
  or
  ( [ '{nnna}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')) ] delete )
  or
  ( [ '{nga}{pulli}' (test not '{vs_ai}')] <- '{ma}{pulli}' )
  or
  ( [ '{nga}{pulli}' ] delete )
  or
  ( [ '{pulli}' (test (among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}') or '{pulli}')) ] delete )
  )
 )

 define remove_pronoun_prefixes as (
 unset found_a_match
 [ among('{a}' '{i}' '{u}') among('{ka}' '{ca}' '{tha}' '{va}' '{na}' '{pa}' '{ma}' '{ya}' '{nga}' '{nya}') '{pulli}' ] delete
 (set found_a_match)
 do fix_va_start
 )

 define remove_plural_suffix as (
 unset found_a_match
 backwards (
  ( [ '{vs_u}{nga}{pulli}{ka}{lla}{pulli}' (test not among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}')) ] <- '{pulli}' ) or
  ( [ '{rra}{pulli}{ka}{lla}{pulli}' ] <- '{la}{pulli}' ) or
  ( [ '{tta}{pulli}{ka}{lla}{pulli}' ] <- '{lla}{pulli}' ) or
  ( [ '{ka}{lla}{pulli}' ] delete )
  (set found_a_match)
 )
 )

 define remove_question_suffixes as (
 has_min_length
 unset found_a_match
 backwards (
  do (
   [ among('{vs_oo}' '{vs_ee}' '{vs_aa}') ] <- '{pulli}'
   (set found_a_match)
  )
 )
 do fix_endings
 )

 define remove_command_suffixes as (
 has_min_length
 unset found_a_match
 backwards (
  [ among('{pa}{vs_i}' '{va}{vs_i}') ] delete
  (set found_a_match)
 )
 )

 define remove_um as (
 unset found_a_match
 has_min_length
 backwards ( [ '{vs_u}{ma}{pulli}' ] <- '{pulli}'
    (set found_a_match)
    )
 do fix_ending
 )

 define remove_common_word_endings as (
 // These are not suffixes actually but are
 // some words that are attached to other words
 // but can be removed for stemming
 unset found_a_match
 has_min_length
 backwards (
  test ( [ '{vs_u}{tta}{nnna}{pulli}' or
     '{vs_i}{la}{pulli}{la}{vs_ai}' or
     '{vs_i}{tta}{ma}{pulli}' or
     '{vs_i}{nnna}{pulli}{rra}{vs_i}' or
     '{vs_aa}{ka}{vs_i}' or
     '{vs_aa}{ka}{vs_i}{ya}' or
     '{vs_e}{nnna}{pulli}{rra}{vs_u}' or
     '{vs_u}{lla}{pulli}{lla}' or
     '{vs_u}{tta}{vs_ai}{ya}' or
     '{vs_u}{tta}{vs_ai}' or
     '{vs_e}{nnna}{vs_u}{ma}{pulli}' or
     ('{la}{pulli}{la}' test (not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
     '{vs_e}{nnna}' or
     '{vs_aa}{ka}{vs_i}' ] <- '{pulli}'
     (set found_a_match)
     )
  or
  test ( [ among('{pa}{tta}{vs_u}'
     '{pa}{tta}{pulli}{tta}'
     '{pa}{tta}{pulli}{tta}{vs_u}'
     '{pa}{tta}{pulli}{tta}{ta}{vs_u}'
     '{pa}{tta}{pulli}{tta}{nna}'
     '{ka}{vs_u}{ra}{vs_i}{ya}'
     '{pa}{rra}{pulli}{rra}{vs_i}'
     '{va}{vs_i}{tta}{vs_u}'
     '{va}{vs_i}{tta}{pulli}{tta}{vs_u}'
     '{pa}{tta}{vs_i}{ta}{vs_aa}{nnna}'
     '{pa}{tta}{vs_i}'
     '{ta}{vs_aa}{nnna}'
     '{vs_e}{la}{pulli}{la}{vs_aa}{ma}{pulli}')
    ] delete
    (set found_a_match)
    )
 )
 do fix_endings
 )

 define remove_vetrumai_urupukal as (
 unset found_a_match
 unset found_vetrumai_urupu
 has_min_length
 backwards (
  (
    test ( ['{nnna}{vs_ai}'] delete )
   or
    test ([ ( '{vs_i}{nnna}{vs_ai}' or
     '{vs_ai}' (test not among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}'))) or
      ( '{vs_ai}' (test (among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') '{pulli}')))
    ] <- '{pulli}'
    )
   or
   test ( [
            '{vs_o}{tta}{vs_u}' or
            '{vs_oo}{tta}{vs_u}' or
            '{vs_i}{la}{pulli}' or
            '{vs_i}{rra}{pulli}' or
            ('{vs_i}{nnna}{pulli}' (test not '{ma}')) or
            '{vs_i}{nnna}{pulli}{rra}{vs_u}' or
            '{vs_i}{ra}{vs_u}{na}{pulli}{ta}{vs_u}' or
            '{va}{vs_i}{tta}' or
            ($(len >= 7) '{vs_i}{tta}{ma}{pulli}') or
            '{vs_aa}{la}{pulli}' or
            '{vs_u}{tta}{vs_ai}' or
            '{vs_aa}{ma}{la}{pulli}' or
            ('{la}{pulli}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
            '{vs_u}{lla}{pulli}'
           ] <- '{pulli}'
     )
   or
   test ( [
           '{ka}{nna}{pulli}' or
           '{ma}{vs_u}{nnna}{pulli}' or
           '{ma}{vs_ee}{la}{pulli}' or
           '{ma}{vs_ee}{rra}{pulli}' or
           '{ka}{vs_ii}{llla}{pulli}' or
           '{pa}{vs_i}{nnna}{pulli}' or
           ('{ta}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')))
          ] delete
      )
   or
   test ([ '{vs_ii}' ] <- '{vs_i}')
  )
  (set found_a_match)
  (set found_vetrumai_urupu)
  do ( [ '{vs_i}{nnna}{pulli}' ] <- '{pulli}' )
 )
 do fix_endings
 )

 define remove_tense_suffixes as (
 set found_a_match
 repeat ( found_a_match (do remove_tense_suffix) )
 )

 define remove_tense_suffix as (
 unset found_a_match
 has_min_length
 backwards (
  do (
   test ( [among(
           '{ka}{vs_o}{nna}{pulli}{tta}{vs_i}{ra}{pulli}'
           '{pa}{tta}{vs_u}'
           )] delete
     (set found_a_match)
     )
   or
   test ( [
            '{ma}{vs_aa}{ra}{pulli}' or
            '{ma}{vs_i}{nnna}{pulli}' or
            '{nnna}{nnna}{pulli}' or
            '{nnna}{vs_aa}{nnna}{pulli}' or
            '{nnna}{vs_aa}{lla}{pulli}' or
            '{nnna}{vs_aa}{ra}{pulli}' or
            ('{va}{nnna}{pulli}' test (not among('{a}' '{aa}' '{i}' '{ii}' '{u}' '{uu}' '{e}' '{ee}' '{ai}' '{o}' '{oo}' '{au}')) ) or
            '{nnna}{lla}{pulli}' or
            '{va}{lla}{pulli}' or
            '{nnna}{ra}{pulli}' or
            '{va}{ra}{pulli}' or
            '{nnna}' or '{pa}' or '{ka}' or '{ta}' or '{ya}' or
            '{pa}{nnna}{pulli}' or
            '{pa}{lla}{pulli}' or
            '{pa}{ra}{pulli}' or
            ('{ta}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
            '{vs_i}{rra}{pulli}{rra}{vs_u}' or
            '{pa}{ma}{pulli}' or
            '{nnna}{ma}{pulli}' or
            '{ta}{vs_u}{ma}{pulli}' or
            '{rra}{vs_u}{ma}{pulli}' or
            '{ka}{vs_u}{ma}{pulli}' or
            '{nnna}{vs_e}{nnna}{pulli}' or
            '{nnna}{vs_ai}' or
            '{va}{vs_ai}'
       ] delete
       (set found_a_match)
       )
   or
   test ( [
           ('{vs_aa}{nnna}{pulli}' test (not '{ca}')) or
           '{vs_aa}{lla}{pulli}' or
           '{vs_aa}{ra}{pulli}' or
           '{vs_ee}{nnna}{pulli}' or
           '{vs_aa}' or
           '{vs_aa}{ma}{pulli}' or
           '{vs_e}{ma}{pulli}' or
           '{vs_ee}{ma}{pulli}' or
           '{vs_oo}{ma}{pulli}' or
           '{ka}{vs_u}{ma}{pulli}' or
           '{ta}{vs_u}{ma}{pulli}' or
           '{tta}{vs_u}{ma}{pulli}' or
           '{rra}{vs_u}{ma}{pulli}' or
           '{vs_aa}{ya}{pulli}' or
           '{nnna}{vs_e}{nnna}{pulli}' or
           '{nnna}{vs_i}{ra}{pulli}' or
           '{vs_ii}{ra}{pulli}' or
           '{vs_ii}{ya}{ra}{pulli}'
          ] <- '{pulli}'
      (set found_a_match)
      )
   or
   test ( ([ '{ka}{vs_u}' or '{ta}{vs_u}' ) (test '{pulli}') ] delete
      (set found_a_match)
      )
  )
  do ([among(
              '{vs_aa}{na}{vs_i}{nnna}{pulli}{rra}'
              '{vs_aa}{na}{vs_i}{nnna}{pulli}{rra}{pulli}'
              '{ka}{vs_i}{nnna}{pulli}{rra}'
              '{ka}{vs_i}{nnna}{pulli}{rra}{pulli}'
              '{ka}{vs_i}{rra}'
              '{ka}{vs_i}{rra}{pulli}'
            )] delete
    (set found_a_match)
    )
 )
 do fix_endings
 )

 define stem as (
 unset found_vetrumai_urupu
 do fix_ending
 has_min_length
 do remove_question_prefixes
 do remove_pronoun_prefixes
 do remove_question_suffixes
 do remove_um
 do remove_common_word_endings
 do remove_vetrumai_urupukal
 do remove_plural_suffix
 do remove_command_suffixes
 do remove_tense_suffixes
 )
--- a/contrib/snowball/algorithms/turkish/stem_Unicode.sbl
+++ b/contrib/snowball/algorithms/turkish/stem_Unicode.sbl
@@ -2,7 +2,7 @@
 	* author: Evren (Kapusuz) Çilden
 	* email: evren.kapusuz at gmail.com
 	* version: 1.0 (15.01.2007)
 	


 	* stems nominal verb suffixes
 	* stems nominal inflections
@@ -10,13 +10,13 @@
 	* (y,n,s,U) context check
 	* vowel harmony check
 	* last consonant check and conversion (b, c, d, ğ to p, ç, t, k)
 	

 	* The stemming algorithm is based on the paper "An Affix Stripping
 	* Morphological Analyzer for Turkish" by Gülşen Eryiğit and
 	* Eşref Adalı (Proceedings of the IAESTED International Conference
 	* ARTIFICIAL INTELLIGENCE AND APPLICATIONS, February 16-18,2004,
 	* Innsbruck, Austria
 	

 	* Turkish is an agglutinative language and has a very rich morphological
 	* structure. In Turkish, you can form many different words from a single stem
 	* by appending a sequence of suffixes. Eg. The word "doktoruymuşsunuz" means
@@ -59,14 +59,14 @@ routines (
 	mark_yken		// nominal verb suffix
 	mark_ymUs_		// nominal verb suffix
 	mark_ysA		// nominal verb suffix
 	

 	mark_suffix_with_optional_y_consonant
 	mark_suffix_with_optional_U_vowel
 	mark_suffix_with_optional_n_consonant
 	mark_suffix_with_optional_s_consonant
 	

 	more_than_one_syllable_word
 	

 	post_process_last_consonants
 	postlude

@@ -75,34 +75,32 @@ routines (
 	stem_suffix_chain_before_ki
 )

 /* Special characters in Unicode Latin-1 and Latin Extended-A */
 stringdef c.   	hex 'E7'	// LATIN SMALL LETTER C WITH CEDILLA
 stringdef g~   	hex '011F'	// LATIN SMALL LETTER G WITH BREVE
 stringdef i'   	hex '0131'	// LATIN SMALL LETTER I WITHOUT DOT
 stringdef o"  	hex 'F6'	// LATIN SMALL LETTER O WITH DIAERESIS
 stringdef s.	hex '015F'	// LATIN SMALL LETTER S WITH CEDILLA
 stringdef u"  	hex 'FC'	// LATIN SMALL LETTER U WITH DIAERESIS

 stringescapes 	{ }
 stringescapes	{ }

 integers 	( strlen )	// length of a string
 /* Special characters in Unicode Latin-1 and Latin Extended-A */
 stringdef c,	'{U+00E7}'	// LATIN SMALL LETTER C WITH CEDILLA
 stringdef g~	'{U+011F}'	// LATIN SMALL LETTER G WITH BREVE
 stringdef i'	'{U+0131}'	// LATIN SMALL LETTER I WITHOUT DOT
 stringdef o"	'{U+00F6}'	// LATIN SMALL LETTER O WITH DIAERESIS
 stringdef s,	'{U+015F}'	// LATIN SMALL LETTER S WITH CEDILLA
 stringdef u"	'{U+00FC}'	// LATIN SMALL LETTER U WITH DIAERESIS

 booleans	( continue_stemming_noun_suffixes )

 groupings 	( vowel U vowel1 vowel2 vowel3 vowel4 vowel5 vowel6)
 groupings	( vowel U vowel1 vowel2 vowel3 vowel4 vowel5 vowel6)

 define vowel 	'ae{i'}io{o"}u{u"}'
 define vowel	'ae{i'}io{o"}u{u"}'
 define U	'{i'}iu{u"}'

 // the vowel grouping definitions below are used for checking vowel harmony
 define vowel1  	'a{i'}ou' 		// vowels that can end with suffixes containing 'a'
 define vowel2  	'ei{o"}{u"}' 		// vowels that can end with suffixes containing 'e'
 define vowel3  	'a{i'}' 		// vowels that can end with suffixes containing 'i''
 define vowel4  	'ei'	 		// vowels that can end with suffixes containing 'i'
 define vowel5  	'ou'	 		// vowels that can end with suffixes containing 'o' or 'u'
 define vowel6  	'{o"}{u"}' 		// vowels that can end with suffixes containing 'o"' or 'u"'
 define vowel1	'a{i'}ou'		// vowels that can end with suffixes containing 'a'
 define vowel2	'ei{o"}{u"}'		// vowels that can end with suffixes containing 'e'
 define vowel3	'a{i'}'			// vowels that can end with suffixes containing 'i''
 define vowel4	'ei'			// vowels that can end with suffixes containing 'i'
 define vowel5	'ou'			// vowels that can end with suffixes containing 'o' or 'u'
 define vowel6	'{o"}{u"}'		// vowels that can end with suffixes containing 'o"' or 'u"'

 externals 	( stem )
 externals	( stem )

 backwardmode (
 	// checks vowel harmony for possible suffixes,
@@ -124,165 +122,165 @@ backwardmode (
 			)
 		)
 	)
 	

 	// if the last consonant before suffix is vowel and n then advance and delete
 	// if the last consonant before suffix is non vowel and n do nothing
 	// if the last consonant before suffix is not n then only delete the suffix
 	// assumption: slice beginning is set correctly
 	define mark_suffix_with_optional_n_consonant as (
 		((test 'n') next (test vowel))
 		('n' (test vowel))
 		or
 		((not(test 'n')) test(next (test vowel)))
 		((not(test 'n')) test(next vowel))

 	)
 	

 	// if the last consonant before suffix is vowel and s then advance and delete
 	// if the last consonant before suffix is non vowel and s do nothing
 	// if the last consonant before suffix is not s then only delete the suffix
 	// assumption: slice beginning is set correctly
 	define mark_suffix_with_optional_s_consonant as (
 		((test 's') next (test vowel))
 		('s' (test vowel))
 		or
 		((not(test 's')) test(next (test vowel)))
 		((not(test 's')) test(next vowel))
 	)
 	

 	// if the last consonant before suffix is vowel and y then advance and delete
 	// if the last consonant before suffix is non vowel and y do nothing
 	// if the last consonant before suffix is not y then only delete the suffix
 	// assumption: slice beginning is set correctly
 	define mark_suffix_with_optional_y_consonant as (
 		((test 'y') next (test vowel))
 		('y' (test vowel))
 		or
 		((not(test 'y')) test(next (test vowel)))
 		((not(test 'y')) test(next vowel))
 	)
 	

 	define mark_suffix_with_optional_U_vowel as (
 		((test U) next (test non-vowel))
 		(U (test non-vowel))
 		or
 		((not(test U)) test(next (test non-vowel)))
 		((not(test U)) test(next non-vowel))

 	)
 	

 	define mark_possessives as (
 		among ('m{i'}z' 'miz' 'muz' 'm{u"}z'
 		       'n{i'}z' 'niz' 'nuz' 'n{u"}z' 'm' 'n')
 		(mark_suffix_with_optional_U_vowel)
 	)
 	

 	define mark_sU as (
 		check_vowel_harmony
 		U
 		(mark_suffix_with_optional_s_consonant)
 	)
 	

 	define mark_lArI as (
 		among ('leri' 'lar{i'}')
 	)
 	

 	define mark_yU as (
 		check_vowel_harmony
 		U
 		(mark_suffix_with_optional_y_consonant)	
 		(mark_suffix_with_optional_y_consonant)
 	)
 	

 	define mark_nU as (
 		check_vowel_harmony
 		among ('n{i'}' 'ni' 'nu' 'n{u"}')	
 		among ('n{i'}' 'ni' 'nu' 'n{u"}')
 	)
 	

 	define mark_nUn as (
 		check_vowel_harmony
 		among ('{i'}n' 'in' 'un' '{u"}n')	
 		among ('{i'}n' 'in' 'un' '{u"}n')
 		(mark_suffix_with_optional_n_consonant)
 	)
 	

 	define mark_yA as (
 		check_vowel_harmony
 		among('a' 'e')
 		(mark_suffix_with_optional_y_consonant)
 	)
 	

 	define mark_nA as (
 		check_vowel_harmony
 		among('na' 'ne')
 	)
 	

 	define mark_DA as (
 		check_vowel_harmony
 		among('da' 'de' 'ta' 'te')
 	)
 	

 	define mark_ndA as (
 		check_vowel_harmony
 		among('nda' 'nde')
 	)
 	

 	define mark_DAn as (
 		check_vowel_harmony
 		among('dan' 'den' 'tan' 'ten')
 	)
 	

 	define mark_ndAn as (
 		check_vowel_harmony
 		among('ndan' 'nden')
 	)
 	

 	define mark_ylA as (
 		check_vowel_harmony
 		among('la' 'le')
 		(mark_suffix_with_optional_y_consonant)
 	)
 	

 	define mark_ki as (
 		'ki'
 	)
 	

 	define mark_ncA as (
 		check_vowel_harmony
 		among('ca' 'ce')	
 		among('ca' 'ce')
 		(mark_suffix_with_optional_n_consonant)
 	)
 	

 	define mark_yUm as (
 		check_vowel_harmony
 		among ('{i'}m' 'im' 'um' '{u"}m')
 		(mark_suffix_with_optional_y_consonant)
 	)
 	

 	define mark_sUn as (
 		check_vowel_harmony
 		among ('s{i'}n' 'sin' 'sun' 's{u"}n' )
 	)
 	

 	define mark_yUz as (
 		check_vowel_harmony
 		among ('{i'}z' 'iz' 'uz' '{u"}z')
 		(mark_suffix_with_optional_y_consonant)
 	)
 	

 	define mark_sUnUz as (
 		among ('s{i'}n{i'}z' 'siniz' 'sunuz' 's{u"}n{u"}z')
 	)
 	

 	define mark_lAr as (
 		check_vowel_harmony
 		among ('ler' 'lar')
 	)
 	

 	define mark_nUz as (
 		check_vowel_harmony
 		among ('n{i'}z' 'niz' 'nuz' 'n{u"}z')
 	)
 	

 	define mark_DUr as (
 		check_vowel_harmony
 		among ('t{i'}r' 'tir' 'tur' 't{u"}r' 'd{i'}r' 'dir' 'dur' 'd{u"}r')
 	)
 	

 	define mark_cAsInA as (
 		among ('cas{i'}na' 'cesine')
 	)
 	

 	define mark_yDU as (
 		check_vowel_harmony
 		among ('t{i'}m' 'tim' 'tum' 't{u"}m' 'd{i'}m' 'dim' 'dum' 'd{u"}m'
@@ -292,24 +290,24 @@ backwardmode (
 		(mark_suffix_with_optional_y_consonant)
 	)

 	// does not fully obey vowel harmony	
 	// does not fully obey vowel harmony
 	define mark_ysA as (
 		among ('sam' 'san' 'sak' 'sem' 'sen' 'sek' 'sa' 'se')
 		(mark_suffix_with_optional_y_consonant)
 	)
 	

 	define mark_ymUs_ as (
 		check_vowel_harmony
 		among ('m{i'}{s.}' 'mi{s.}' 'mu{s.}' 'm{u"}{s.}')
 		among ('m{i'}{s,}' 'mi{s,}' 'mu{s,}' 'm{u"}{s,}')
 		(mark_suffix_with_optional_y_consonant)
 	)
 	

 	define mark_yken as (
 		'ken' (mark_suffix_with_optional_y_consonant)
 	)
 	

 	define stem_nominal_verb_suffixes as (
 		[	
 		[
 			set continue_stemming_noun_suffixes
 			(mark_ymUs_ or mark_yDU or mark_ysA or mark_yken)
 			or
@@ -327,7 +325,7 @@ backwardmode (
 			(mark_DUr ] delete try([ (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_))
 		]delete
 	)
 	

 	// stems noun suffix chains ending with -ki
 	define stem_suffix_chain_before_ki as (
 		[
@@ -337,7 +335,7 @@ backwardmode (
 					(mark_lAr] delete try(stem_suffix_chain_before_ki))
 					or
 					(mark_possessives] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 					

 				))
 				or
 				(mark_nUn] delete try([
@@ -348,7 +346,7 @@ backwardmode (
 					(stem_suffix_chain_before_ki)
 				))
 				or
 				(mark_ndA (	
 				(mark_ndA (
 					(mark_lArI] delete)
 					or
 					((mark_sU] delete try([mark_lAr]delete stem_suffix_chain_before_ki)))
@@ -357,7 +355,7 @@ backwardmode (
 				))
 			)
 	)
 	

 	define stem_noun_suffixes as (
 		([mark_lAr] delete try(stem_suffix_chain_before_ki))
 		or
@@ -373,24 +371,24 @@ backwardmode (
 		or
 		([(mark_ndA or mark_nA)
 			(
 		  		(mark_lArI] delete)
 		  		or
 		  		(mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 		  		or
 		  		(stem_suffix_chain_before_ki)
 		  	)
 				(mark_lArI] delete)
 				or
 				(mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 				or
 				(stem_suffix_chain_before_ki)
 			)
 		)
 		or
 		([(mark_ndAn or mark_nU) ((mark_sU ] delete try([mark_lAr] delete stem_suffix_chain_before_ki)) or (mark_lArI)))
 		or
 		( [mark_DAn] delete try ([
 			(
 		 		(mark_possessives ] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 		 		or
 		 		(mark_lAr] delete try(stem_suffix_chain_before_ki))
 		 		or
 		 		(stem_suffix_chain_before_ki)
 		 	))
 				(mark_possessives ] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 				or
 				(mark_lAr] delete try(stem_suffix_chain_before_ki))
 				or
 				(stem_suffix_chain_before_ki)
 			))
 		)
 		or
 		([mark_nUn or mark_ylA] delete
@@ -404,18 +402,18 @@ backwardmode (
 		)
 		or
 		([mark_lArI] delete)
 		or	
 		or
 		(stem_suffix_chain_before_ki)
 		or
 		([mark_DA or mark_yU or mark_yA] delete try([((mark_possessives] delete try([mark_lAr)) or mark_lAr) ] delete [ stem_suffix_chain_before_ki))
 		or
 		([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
 	)
 	
 	define post_process_last_consonants as (	

 	define post_process_last_consonants as (
 		[substring] among (
 			'b' (<- 'p')
 			'c' (<- '{c.}')
 			'c' (<- '{c,}')
 			'd' (<- 't')
 			'{g~}' (<- 'k')
 		)
@@ -424,7 +422,7 @@ backwardmode (
 	// after stemming if the word ends with 'd' or 'g' most probably last U is overstemmed
 	// like in 'kedim' -> 'ked'
 	// Turkish words don't usually end with 'd' or 'g'
 	// some very well known words are ignored (like 'ad' 'soyad'	
 	// some very well known words are ignored (like 'ad' 'soyad'
 	// appends U to stems ending with d or g, decides which vowel to add
 	// based on the last vowel in the stem
 	define append_U_to_stems_ending_with_d_or_g as (
@@ -437,7 +435,10 @@ backwardmode (
 		or
 		(test((goto vowel) '{o"}' or '{u"}') <+ '{u"}')
 	)
 	

 	define is_reserved_word as (
 		'ad' try 'soy' atlimit
 	)
 )

 // Tests if there are more than one syllables
@@ -446,18 +447,12 @@ define more_than_one_syllable_word as (
 	test (atleast 2 (gopast vowel))
 )

 define is_reserved_word as (
 	test(gopast 'ad' ($strlen = 2) ($strlen == limit))
 	or
 	test(gopast 'soyad' ($strlen = 5) ($strlen == limit))
 )

 define postlude as (
 	not(is_reserved_word)
 	backwards (
 		not(is_reserved_word)
 		do append_U_to_stems_ending_with_d_or_g
 		do post_process_last_consonants
 		

 	)
 )

@@ -469,9 +464,7 @@ define stem as (
 			continue_stemming_noun_suffixes
 			do stem_noun_suffixes
 		)
 		

 	postlude
 	)
 )


--- a/contrib/snowball/charsets/ISO-8859-2.sbl
+++ b/contrib/snowball/charsets/ISO-8859-2.sbl
@@ -0,0 +1,98 @@
 // ISO-8859-2 character mappings.

 stringdef U+00A0  hex 'A0'
 stringdef U+0104  hex 'A1'
 stringdef U+02D8  hex 'A2'
 stringdef U+0141  hex 'A3'
 stringdef U+00A4  hex 'A4'
 stringdef U+013D  hex 'A5'
 stringdef U+015A  hex 'A6'
 stringdef U+00A7  hex 'A7'
 stringdef U+00A8  hex 'A8'
 stringdef U+0160  hex 'A9'
 stringdef U+015E  hex 'AA'
 stringdef U+0164  hex 'AB'
 stringdef U+0179  hex 'AC'
 stringdef U+00AD  hex 'AD'
 stringdef U+017D  hex 'AE'
 stringdef U+017B  hex 'AF'
 stringdef U+00B0  hex 'B0'
 stringdef U+0105  hex 'B1'
 stringdef U+02DB  hex 'B2'
 stringdef U+0142  hex 'B3'
 stringdef U+00B4  hex 'B4'
 stringdef U+013E  hex 'B5'
 stringdef U+015B  hex 'B6'
 stringdef U+02C7  hex 'B7'
 stringdef U+00B8  hex 'B8'
 stringdef U+0161  hex 'B9'
 stringdef U+015F  hex 'BA'
 stringdef U+0165  hex 'BB'
 stringdef U+017A  hex 'BC'
 stringdef U+02DD  hex 'BD'
 stringdef U+017E  hex 'BE'
 stringdef U+017C  hex 'BF'
 stringdef U+0154  hex 'C0'
 stringdef U+00C1  hex 'C1'
 stringdef U+00C2  hex 'C2'
 stringdef U+0102  hex 'C3'
 stringdef U+00C4  hex 'C4'
 stringdef U+0139  hex 'C5'
 stringdef U+0106  hex 'C6'
 stringdef U+00C7  hex 'C7'
 stringdef U+010C  hex 'C8'
 stringdef U+00C9  hex 'C9'
 stringdef U+0118  hex 'CA'
 stringdef U+00CB  hex 'CB'
 stringdef U+011A  hex 'CC'
 stringdef U+00CD  hex 'CD'
 stringdef U+00CE  hex 'CE'
 stringdef U+010E  hex 'CF'
 stringdef U+0110  hex 'D0'
 stringdef U+0143  hex 'D1'
 stringdef U+0147  hex 'D2'
 stringdef U+00D3  hex 'D3'
 stringdef U+00D4  hex 'D4'
 stringdef U+0150  hex 'D5'
 stringdef U+00D6  hex 'D6'
 stringdef U+00D7  hex 'D7'
 stringdef U+0158  hex 'D8'
 stringdef U+016E  hex 'D9'
 stringdef U+00DA  hex 'DA'
 stringdef U+0170  hex 'DB'
 stringdef U+00DC  hex 'DC'
 stringdef U+00DD  hex 'DD'
 stringdef U+0162  hex 'DE'
 stringdef U+00DF  hex 'DF'
 stringdef U+0155  hex 'E0'
 stringdef U+00E1  hex 'E1'
 stringdef U+00E2  hex 'E2'
 stringdef U+0103  hex 'E3'
 stringdef U+00E4  hex 'E4'
 stringdef U+013A  hex 'E5'
 stringdef U+0107  hex 'E6'
 stringdef U+00E7  hex 'E7'
 stringdef U+010D  hex 'E8'
 stringdef U+00E9  hex 'E9'
 stringdef U+0119  hex 'EA'
 stringdef U+00EB  hex 'EB'
 stringdef U+011B  hex 'EC'
 stringdef U+00ED  hex 'ED'
 stringdef U+00EE  hex 'EE'
 stringdef U+010F  hex 'EF'
 stringdef U+0111  hex 'F0'
 stringdef U+0144  hex 'F1'
 stringdef U+0148  hex 'F2'
 stringdef U+00F3  hex 'F3'
 stringdef U+00F4  hex 'F4'
 stringdef U+0151  hex 'F5'
 stringdef U+00F6  hex 'F6'
 stringdef U+00F7  hex 'F7'
 stringdef U+0159  hex 'F8'
 stringdef U+016F  hex 'F9'
 stringdef U+00FA  hex 'FA'
 stringdef U+0171  hex 'FB'
 stringdef U+00FC  hex 'FC'
 stringdef U+00FD  hex 'FD'
 stringdef U+0163  hex 'FE'
 stringdef U+02D9  hex 'FF'
--- a/contrib/snowball/charsets/KOI8-R.sbl
+++ b/contrib/snowball/charsets/KOI8-R.sbl
@@ -0,0 +1,74 @@
 // KOI8-R character mappings.

 stringdef U+00A0  hex '9A'
 stringdef U+00A9  hex 'BF'
 stringdef U+00B0  hex '9C'
 stringdef U+00B2  hex '9D'
 stringdef U+00B7  hex '9E'
 stringdef U+00F7  hex '9F'
 stringdef U+0401  hex 'B3'
 stringdef U+0410  hex 'E1'
 stringdef U+0411  hex 'E2'
 stringdef U+0412  hex 'F7'
 stringdef U+0413  hex 'E7'
 stringdef U+0414  hex 'E4'
 stringdef U+0415  hex 'E5'
 stringdef U+0416  hex 'F6'
 stringdef U+0417  hex 'FA'
 stringdef U+0418  hex 'E9'
 stringdef U+0419  hex 'EA'
 stringdef U+041A  hex 'EB'
 stringdef U+041B  hex 'EC'
 stringdef U+041C  hex 'ED'
 stringdef U+041D  hex 'EE'
 stringdef U+041E  hex 'EF'
 stringdef U+041F  hex 'F0'
 stringdef U+0420  hex 'F2'
 stringdef U+0421  hex 'F3'
 stringdef U+0422  hex 'F4'
 stringdef U+0423  hex 'F5'
 stringdef U+0424  hex 'E6'
 stringdef U+0425  hex 'E8'
 stringdef U+0426  hex 'E3'
 stringdef U+0427  hex 'FE'
 stringdef U+0428  hex 'FB'
 stringdef U+0429  hex 'FD'
 stringdef U+042A  hex 'FF'
 stringdef U+042B  hex 'F9'
 stringdef U+042C  hex 'F8'
 stringdef U+042D  hex 'FC'
 stringdef U+042E  hex 'E0'
 stringdef U+042F  hex 'F1'
 stringdef U+0430  hex 'C1'
 stringdef U+0431  hex 'C2'
 stringdef U+0432  hex 'D7'
 stringdef U+0433  hex 'C7'
 stringdef U+0434  hex 'C4'
 stringdef U+0435  hex 'C5'
 stringdef U+0436  hex 'D6'
 stringdef U+0437  hex 'DA'
 stringdef U+0438  hex 'C9'
 stringdef U+0439  hex 'CA'
 stringdef U+043A  hex 'CB'
 stringdef U+043B  hex 'CC'
 stringdef U+043C  hex 'CD'
 stringdef U+043D  hex 'CE'
 stringdef U+043E  hex 'CF'
 stringdef U+043F  hex 'D0'
 stringdef U+0440  hex 'D2'
 stringdef U+0441  hex 'D3'
 stringdef U+0442  hex 'D4'
 stringdef U+0443  hex 'D5'
 stringdef U+0444  hex 'C6'
 stringdef U+0445  hex 'C8'
 stringdef U+0446  hex 'C3'
 stringdef U+0447  hex 'DE'
 stringdef U+0448  hex 'DB'
 stringdef U+0449  hex 'DD'
 stringdef U+044A  hex 'DF'
 stringdef U+044B  hex 'D9'
 stringdef U+044C  hex 'D8'
 stringdef U+044D  hex 'DC'
 stringdef U+044E  hex 'C0'
 stringdef U+044F  hex 'D1'
 stringdef U+0451  hex 'A3'
--- a/contrib/snowball/charsets/cp850.sbl
+++ b/contrib/snowball/charsets/cp850.sbl
@@ -0,0 +1,130 @@
 // Code page 850 (MSDOS Latin 1) character mappings.

 stringdef U+00A0  hex 'FF'
 stringdef U+00A1  hex 'AD'
 stringdef U+00A2  hex 'BD'
 stringdef U+00A3  hex '9C'
 stringdef U+00A4  hex 'CF'
 stringdef U+00A5  hex 'BE'
 stringdef U+00A6  hex 'DD'
 stringdef U+00A7  hex 'F5'
 stringdef U+00A8  hex 'F9'
 stringdef U+00A9  hex 'B8'
 stringdef U+00AA  hex 'A6'
 stringdef U+00AB  hex 'AE'
 stringdef U+00AC  hex 'AA'
 stringdef U+00AD  hex 'F0'
 stringdef U+00AE  hex 'A9'
 stringdef U+00AF  hex 'EE'
 stringdef U+00B0  hex 'F8'
 stringdef U+00B1  hex 'F1'
 stringdef U+00B2  hex 'FD'
 stringdef U+00B3  hex 'FC'
 stringdef U+00B4  hex 'EF'
 stringdef U+00B5  hex 'E6'
 stringdef U+00B6  hex 'F4'
 stringdef U+00B7  hex 'FA'
 stringdef U+00B8  hex 'F7'
 stringdef U+00B9  hex 'FB'
 stringdef U+00BA  hex 'A7'
 stringdef U+00BB  hex 'AF'
 stringdef U+00BC  hex 'AC'
 stringdef U+00BD  hex 'AB'
 stringdef U+00BE  hex 'F3'
 stringdef U+00BF  hex 'A8'
 stringdef U+00C0  hex 'B7'
 stringdef U+00C1  hex 'B5'
 stringdef U+00C2  hex 'B6'
 stringdef U+00C3  hex 'C7'
 stringdef U+00C4  hex '8E'
 stringdef U+00C5  hex '8F'
 stringdef U+00C6  hex '92'
 stringdef U+00C7  hex '80'
 stringdef U+00C8  hex 'D4'
 stringdef U+00C9  hex '90'
 stringdef U+00CA  hex 'D2'
 stringdef U+00CB  hex 'D3'
 stringdef U+00CC  hex 'DE'
 stringdef U+00CD  hex 'D6'
 stringdef U+00CE  hex 'D7'
 stringdef U+00CF  hex 'D8'
 stringdef U+00D0  hex 'D1'
 stringdef U+00D1  hex 'A5'
 stringdef U+00D2  hex 'E3'
 stringdef U+00D3  hex 'E0'
 stringdef U+00D4  hex 'E2'
 stringdef U+00D5  hex 'E5'
 stringdef U+00D6  hex '99'
 stringdef U+00D7  hex '9E'
 stringdef U+00D8  hex '9D'
 stringdef U+00D9  hex 'EB'
 stringdef U+00DA  hex 'E9'
 stringdef U+00DB  hex 'EA'
 stringdef U+00DC  hex '9A'
 stringdef U+00DD  hex 'ED'
 stringdef U+00DE  hex 'E8'
 stringdef U+00DF  hex 'E1'
 stringdef U+00E0  hex '85'
 stringdef U+00E1  hex 'A0'
 stringdef U+00E2  hex '83'
 stringdef U+00E3  hex 'C6'
 stringdef U+00E4  hex '84'
 stringdef U+00E5  hex '86'
 stringdef U+00E6  hex '91'
 stringdef U+00E7  hex '87'
 stringdef U+00E8  hex '8A'
 stringdef U+00E9  hex '82'
 stringdef U+00EA  hex '88'
 stringdef U+00EB  hex '89'
 stringdef U+00EC  hex '8D'
 stringdef U+00ED  hex 'A1'
 stringdef U+00EE  hex '8C'
 stringdef U+00EF  hex '8B'
 stringdef U+00F0  hex 'D0'
 stringdef U+00F1  hex 'A4'
 stringdef U+00F2  hex '95'
 stringdef U+00F3  hex 'A2'
 stringdef U+00F4  hex '93'
 stringdef U+00F5  hex 'E4'
 stringdef U+00F6  hex '94'
 stringdef U+00F7  hex 'F6'
 stringdef U+00F8  hex '9B'
 stringdef U+00F9  hex '97'
 stringdef U+00FA  hex 'A3'
 stringdef U+00FB  hex '96'
 stringdef U+00FC  hex '81'
 stringdef U+00FD  hex 'EC'
 stringdef U+00FE  hex 'E7'
 stringdef U+00FF  hex '98'
 stringdef U+0131  hex 'D5'
 stringdef U+0192  hex '9F'
 stringdef U+2017  hex 'F2'
 stringdef U+2500  hex 'C4'
 stringdef U+2502  hex 'B3'
 stringdef U+250C  hex 'DA'
 stringdef U+2510  hex 'BF'
 stringdef U+2514  hex 'C0'
 stringdef U+2518  hex 'D9'
 stringdef U+251C  hex 'C3'
 stringdef U+2524  hex 'B4'
 stringdef U+252C  hex 'C2'
 stringdef U+2534  hex 'C1'
 stringdef U+253C  hex 'C5'
 stringdef U+2550  hex 'CD'
 stringdef U+2551  hex 'BA'
 stringdef U+2554  hex 'C9'
 stringdef U+2557  hex 'BB'
 stringdef U+255A  hex 'C8'
 stringdef U+255D  hex 'BC'
 stringdef U+2560  hex 'CC'
 stringdef U+2563  hex 'B9'
 stringdef U+2566  hex 'CB'
 stringdef U+2569  hex 'CA'
 stringdef U+256C  hex 'CE'
 stringdef U+2580  hex 'DF'
 stringdef U+2584  hex 'DC'
 stringdef U+2588  hex 'DB'
 stringdef U+2591  hex 'B0'
 stringdef U+2592  hex 'B1'
 stringdef U+2593  hex 'B2'
 stringdef U+25A0  hex 'FE'
--- a/contrib/snowball/compiler/analyser.c
+++ b/contrib/snowball/compiler/analyser.c
--- a/contrib/snowball/compiler/driver.c
+++ b/contrib/snowball/compiler/driver.c
@@ -1,48 +1,86 @@
 #include <ctype.h>   /* for toupper etc */
 #include <stdio.h>   /* for fprintf etc */
 #include <stdlib.h>  /* for free etc */
 #include <string.h>  /* for strlen */
 #include <string.h>  /* for strcmp */
 #include "header.h"

 #define DEFAULT_PACKAGE "org.tartarus.snowball.ext"
 #define DEFAULT_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
 #define DEFAULT_AMONG_CLASS "org.tartarus.snowball.Among"
 #define DEFAULT_STRING_CLASS "java.lang.StringBuilder"
 #define DEFAULT_JAVA_PACKAGE "org.tartarus.snowball.ext"
 #define DEFAULT_JAVA_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
 #define DEFAULT_JAVA_AMONG_CLASS "org.tartarus.snowball.Among"
 #define DEFAULT_JAVA_STRING_CLASS "java.lang.StringBuilder"

 #define DEFAULT_GO_PACKAGE "snowball"
 #define DEFAULT_GO_SNOWBALL_RUNTIME "github.com/snowballstem/snowball/go"

 #define DEFAULT_CS_NAMESPACE "Snowball"
 #define DEFAULT_CS_BASE_CLASS "Stemmer"
 #define DEFAULT_CS_AMONG_CLASS "Among"
 #define DEFAULT_CS_STRING_CLASS "StringBuilder"

 #define DEFAULT_JS_BASE_CLASS "BaseStemmer"

 #define DEFAULT_PYTHON_BASE_CLASS "BaseStemmer"

 static int eq(const char * s1, const char * s2) {
    int s1_len = strlen(s1);
    int s2_len = strlen(s2);
    return s1_len == s2_len && memcmp(s1, s2, s1_len) == 0;
    return strcmp(s1, s2) == 0;
 }

 static void print_arglist(void) {
    fprintf(stderr, "Usage: snowball <file> [options]\n\n"
                    "options are: [-o[utput] file]\n"
                    "             [-s[yntax]]\n"
 static void print_arglist(int exit_code) {
    FILE * f = exit_code ? stderr : stdout;
    fprintf(f, "Usage: snowball SOURCE_FILE... [OPTIONS]\n\n"
               "Supported options:\n"
               "  -o[utput] file\n"
               "  -s[yntax]\n"
               "  -comments\n"
 #ifndef DISABLE_JAVA
                    "             [-j[ava]]\n"
               "  -j[ava]\n"
 #endif
                    "             [-c++]\n"
                    "             [-w[idechars]]\n"
                    "             [-u[tf8]]\n"
                    "             [-n[ame] class name]\n"
                    "             [-ep[refix] string]\n"
                    "             [-vp[refix] string]\n"
                    "             [-i[nclude] directory]\n"
                    "             [-r[untime] path to runtime headers]\n"
 #ifndef DISABLE_JAVA
                    "             [-p[arentclassname] fully qualified parent class name]\n"
                    "             [-P[ackage] package name for stemmers]\n"
                    "             [-S[tringclass] StringBuffer-compatible class]\n"
                    "             [-a[mongclass] fully qualified name of the Among class]\n"
 #ifndef DISABLE_CSHARP
               "  -cs[harp]\n"
 #endif
               "  -c++\n"
 #ifndef DISABLE_PASCAL
               "  -pascal\n"
 #endif
 #ifndef DISABLE_PYTHON
               "  -py[thon]\n"
 #endif
 #ifndef DISABLE_JS
               "  -js\n"
 #endif
 #ifndef DISABLE_RUST
               "  -rust\n"
 #endif
 #ifndef DISABLE_GO
               "  -go\n"
 #endif
               "  -w[idechars]\n"
               "  -u[tf8]\n"
               "  -n[ame] class name\n"
               "  -ep[refix] string\n"
               "  -vp[refix] string\n"
               "  -i[nclude] directory\n"
               "  -r[untime] path to runtime headers\n"
               "  -p[arentclassname] fully qualified parent class name\n"
 #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
               "  -P[ackage] package name for stemmers\n"
               "  -S[tringclass] StringBuffer-compatible class\n"
               "  -a[mongclass] fully qualified name of the Among class\n"
 #endif
 #ifndef DISABLE_GO
               "  -gop[ackage] Go package name for stemmers\n"
               "  -gor[untime] Go snowball runtime package\n"
 #endif
               "  --help        display this help and exit\n"
               "  --version     output version information and exit\n"
           );
    exit(1);
    exit(exit_code);
 }

 static void check_lim(int i, int argc) {
    if (i >= argc) {
        fprintf(stderr, "argument list is one short\n");
        print_arglist();
        print_arglist(1);
    }
 }

@@ -57,35 +95,47 @@ static FILE * get_output(symbol * b) {
    return output;
 }

 static void read_options(struct options * o, int argc, char * argv[]) {
 static int read_options(struct options * o, int argc, char * argv[]) {
    char * s;
    int i = 2;
    int i = 1;
    int new_argc = 1;
    /* Note down the last option used to specify an explicit encoding so
     * we can warn we ignored it for languages with a fixed encoding.
     */
    const char * encoding_opt = NULL;

    /* set defaults: */

    o->output_file = 0;
    o->syntax_tree = false;
    o->externals_prefix = "";
    o->comments = false;
    o->externals_prefix = NULL;
    o->variables_prefix = 0;
    o->runtime_path = 0;
    o->parent_class_name = DEFAULT_BASE_CLASS;
    o->string_class = DEFAULT_STRING_CLASS;
    o->among_class = DEFAULT_AMONG_CLASS;
    o->package = DEFAULT_PACKAGE;
    o->name = "";
    o->parent_class_name = NULL;
    o->string_class = NULL;
    o->among_class = NULL;
    o->package = NULL;
    o->go_snowball_runtime = DEFAULT_GO_SNOWBALL_RUNTIME;
    o->name = NULL;
    o->make_lang = LANG_C;
    o->widechars = false;
    o->includes = 0;
    o->includes_end = 0;
    o->utf8 = false;
    o->encoding = ENC_SINGLEBYTE;

    /* read options: */

    repeat {
        if (i >= argc) break;
    while (i < argc) {
        s = argv[i++];
        {   if (eq(s, "-o") || eq(s, "-output")) {
                check_lim(i, argc);
        if (s[0] != '-') {
            /* Non-option argument - shuffle down. */
            argv[new_argc++] = s;
            continue;
        }

        {
            if (eq(s, "-o") || eq(s, "-output")) {
               check_lim(i, argc);
                o->output_file = argv[i++];
                continue;
            }
@@ -94,10 +144,33 @@ static void read_options(struct options * o, int argc, char * argv[]) {
                o->name = argv[i++];
                continue;
            }
 #ifndef DISABLE_JS
            if (eq(s, "-js")) {
                o->make_lang = LANG_JAVASCRIPT;
                continue;
            }
 #endif
 #ifndef DISABLE_RUST
            if (eq(s, "-rust")) {
                o->make_lang = LANG_RUST;
                continue;
            }
 #endif
 #ifndef DISABLE_GO
            if (eq(s, "-go")) {
                o->make_lang = LANG_GO;
                continue;
            }
 #endif
 #ifndef DISABLE_JAVA
            if (eq(s, "-j") || eq(s, "-java")) {
                o->make_lang = LANG_JAVA;
                o->widechars = true;
                continue;
            }
 #endif
 #ifndef DISABLE_CSHARP
            if (eq(s, "-cs") || eq(s, "-csharp")) {
                o->make_lang = LANG_CSHARP;
                continue;
            }
 #endif
@@ -105,15 +178,31 @@ static void read_options(struct options * o, int argc, char * argv[]) {
                o->make_lang = LANG_CPLUSPLUS;
                continue;
            }
 #ifndef DISABLE_PASCAL
            if (eq(s, "-pascal")) {
                o->make_lang = LANG_PASCAL;
                continue;
            }
 #endif
 #ifndef DISABLE_PYTHON
            if (eq(s, "-py") || eq(s, "-python")) {
                o->make_lang = LANG_PYTHON;
                continue;
            }
 #endif
            if (eq(s, "-w") || eq(s, "-widechars")) {
                o->widechars = true;
                o->utf8 = false;
                encoding_opt = s;
                o->encoding = ENC_WIDECHARS;
                continue;
            }
            if (eq(s, "-s") || eq(s, "-syntax")) {
                o->syntax_tree = true;
                continue;
            }
            if (eq(s, "-comments")) {
                o->comments = true;
                continue;
            }
            if (eq(s, "-ep") || eq(s, "-eprefix")) {
                check_lim(i, argc);
                o->externals_prefix = argv[i++];
@@ -145,16 +234,16 @@ static void read_options(struct options * o, int argc, char * argv[]) {
                continue;
            }
            if (eq(s, "-u") || eq(s, "-utf8")) {
                o->utf8 = true;
                o->widechars = false;
                encoding_opt = s;
                o->encoding = ENC_UTF8;
                continue;
            }
 #ifndef DISABLE_JAVA
            if (eq(s, "-p") || eq(s, "-parentclassname")) {
                check_lim(i, argc);
                o->parent_class_name = argv[i++];
                continue;
            }
 #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
            if (eq(s, "-P") || eq(s, "-Package")) {
                check_lim(i, argc);
                o->package = argv[i++];
@@ -171,44 +260,216 @@ static void read_options(struct options * o, int argc, char * argv[]) {
                continue;
            }
 #endif
 #ifndef DISABLE_GO
            if (eq(s, "-gop") || eq(s, "-gopackage")) {
                check_lim(i, argc);
                o->package = argv[i++];
                continue;
            }
            if (eq(s, "-gor") || eq(s, "-goruntime")) {
                check_lim(i, argc);
                o->go_snowball_runtime = argv[i++];
                continue;
            }
 #endif
            if (eq(s, "--help")) {
                print_arglist(0);
            }

            if (eq(s, "--version")) {
                printf("Snowball compiler version " SNOWBALL_VERSION "\n");
                exit(0);
            }

            fprintf(stderr, "'%s' misplaced\n", s);
            print_arglist();
            print_arglist(1);
        }
    }
    if (new_argc == 1) {
        fprintf(stderr, "no source files specified\n");
        print_arglist(1);
    }
    argv[new_argc] = NULL;

    /* Set language-dependent defaults. */
    switch (o->make_lang) {
        case LANG_C:
        case LANG_CPLUSPLUS:
            encoding_opt = NULL;
            break;
        case LANG_CSHARP:
            o->encoding = ENC_WIDECHARS;
            if (!o->parent_class_name)
                o->parent_class_name = DEFAULT_CS_BASE_CLASS;
            if (!o->string_class)
                o->string_class = DEFAULT_CS_STRING_CLASS;
            if (!o->among_class)
                o->among_class = DEFAULT_CS_AMONG_CLASS;
            if (!o->package)
                o->package = DEFAULT_CS_NAMESPACE;
            break;
        case LANG_GO:
            o->encoding = ENC_UTF8;
            if (!o->package)
                o->package = DEFAULT_GO_PACKAGE;
            break;
        case LANG_JAVA:
            o->encoding = ENC_WIDECHARS;
            if (!o->parent_class_name)
                o->parent_class_name = DEFAULT_JAVA_BASE_CLASS;
            if (!o->string_class)
                o->string_class = DEFAULT_JAVA_STRING_CLASS;
            if (!o->among_class)
                o->among_class = DEFAULT_JAVA_AMONG_CLASS;
            if (!o->package)
                o->package = DEFAULT_JAVA_PACKAGE;
            break;
        case LANG_JAVASCRIPT:
            o->encoding = ENC_WIDECHARS;
            if (!o->parent_class_name)
                o->parent_class_name = DEFAULT_JS_BASE_CLASS;
            break;
        case LANG_PYTHON:
            o->encoding = ENC_WIDECHARS;
            if (!o->parent_class_name)
                o->parent_class_name = DEFAULT_PYTHON_BASE_CLASS;
            break;
        case LANG_RUST:
            o->encoding = ENC_UTF8;
            break;
        default:
            break;
    }

    if (encoding_opt) {
        fprintf(stderr, "warning: %s only meaningful for C and C++\n",
                encoding_opt);
    }

    if (o->make_lang != LANG_C && o->make_lang != LANG_CPLUSPLUS) {
        if (o->runtime_path) {
            fprintf(stderr, "warning: -r/-runtime only meaningful for C and C++\n");
        }
        if (o->externals_prefix) {
            fprintf(stderr, "warning: -ep/-eprefix only meaningful for C and C++\n");
        }
    }
    if (!o->externals_prefix) o->externals_prefix = "";

    if (!o->name && o->output_file) {
        /* Default class name to basename of output_file - this is the standard
         * convention for at least Java and C#.
         */
        const char * slash = strrchr(o->output_file, '/');
        size_t len;
        const char * leaf = (slash == NULL) ? o->output_file : slash + 1;

        slash = strrchr(leaf, '\\');
        if (slash != NULL) leaf = slash + 1;

        {
            const char * dot = strchr(leaf, '.');
            len = (dot == NULL) ? strlen(leaf) : (size_t)(dot - leaf);
        }

        {
            char * new_name = malloc(len + 1);
            switch (o->make_lang) {
                case LANG_CSHARP:
                case LANG_PASCAL:
                    /* Upper case initial letter. */
                    memcpy(new_name, leaf, len);
                    new_name[0] = toupper(new_name[0]);
                    break;
                case LANG_JAVASCRIPT:
                case LANG_PYTHON: {
                    /* Upper case initial letter and change each
                     * underscore+letter or hyphen+letter to an upper case
                     * letter.
                     */
                    size_t i, j = 0;
                    int uc_next = true;
                    for (i = 0; i != len; ++i) {
                        unsigned char ch = leaf[i];
                        if (ch == '_' || ch == '-') {
                            uc_next = true;
                        } else {
                            if (uc_next) {
                                new_name[j] = toupper(ch);
                                uc_next = false;
                            } else {
                                new_name[j] = ch;
                            }
                            ++j;
                        }
                    }
                    len = j;
                    break;
                }
                default:
                    /* Just copy. */
                    memcpy(new_name, leaf, len);
                    break;
            }
            new_name[len] = '\0';
            o->name = new_name;
        }
    }

    return new_argc;
 }

 extern int main(int argc, char * argv[]) {

    int i;
    NEW(options, o);
    if (argc == 1) print_arglist();
    read_options(o, argc, argv);
    argc = read_options(o, argc, argv);
    {
        symbol * filename = add_s_to_b(0, argv[1]);
        char * file;
        symbol * u = get_input(filename, &file);
        char * file = argv[1];
        symbol * u = get_input(file);
        if (u == 0) {
            fprintf(stderr, "Can't open input %s\n", argv[1]);
            fprintf(stderr, "Can't open input %s\n", file);
            exit(1);
        }
        {
            struct tokeniser * t = create_tokeniser(u, file);
            struct analyser * a = create_analyser(t);
            t->widechars = o->widechars;
            struct input ** next_input_ptr = &(t->next);
            a->encoding = t->encoding = o->encoding;
            t->includes = o->includes;
            a->utf8 = t->utf8 = o->utf8;
            /* If multiple source files are specified, set up the others to be
             * read after the first in order, using the same mechanism as
             * 'get' uses. */
            for (i = 2; i != argc; ++i) {
                NEW(input, q);
                file = argv[i];
                u = get_input(file);
                if (u == 0) {
                    fprintf(stderr, "Can't open input %s\n", file);
                    exit(1);
                }
                q->p = u;
                q->c = 0;
                q->file = file;
                q->file_needs_freeing = false;
                q->line_number = 1;
                *next_input_ptr = q;
                next_input_ptr = &(q->next);
            }
            *next_input_ptr = NULL;
            read_program(a);
            if (t->error_count > 0) exit(1);
            if (o->syntax_tree) print_program(a);
            close_tokeniser(t);
            unless (o->syntax_tree) {
            if (!o->syntax_tree) {
                struct generator * g;

                char * s = o->output_file;
                unless (s) {
                const char * s = o->output_file;
                if (!s) {
                    fprintf(stderr, "Please include the -o option\n");
                    print_arglist();
                    exit(1);
                    print_arglist(1);
                }
                g = create_generator(a, o);
                if (o->make_lang == LANG_C || o->make_lang == LANG_CPLUSPLUS) {
                    symbol * b = add_s_to_b(0, s);
                    b = add_s_to_b(b, ".h");
@@ -217,41 +478,96 @@ extern int main(int argc, char * argv[]) {
                    if (o->make_lang == LANG_CPLUSPLUS) {
                        b = add_s_to_b(b, "c");
                    }
                    o->output_c = get_output(b);
                    o->output_src = get_output(b);
                    lose_b(b);

                    g = create_generator_c(a, o);
                    generate_program_c(g);
                    close_generator_c(g);
                    fclose(o->output_c);
                    fclose(o->output_src);
                    fclose(o->output_h);
                }
 #ifndef DISABLE_JAVA
                if (o->make_lang == LANG_JAVA) {
                    symbol * b = add_s_to_b(0, s);
                    b = add_s_to_b(b, ".java");
                    o->output_java = get_output(b);
                    o->output_src = get_output(b);
                    lose_b(b);
                    g = create_generator_java(a, o);
                    generate_program_java(g);
                    close_generator_java(g);
                    fclose(o->output_java);
                    fclose(o->output_src);
                }
 #endif
 #ifndef DISABLE_PASCAL
                if (o->make_lang == LANG_PASCAL) {
                    symbol *b = add_s_to_b(0, s);
                    b = add_s_to_b(b, ".pas");
                    o->output_src = get_output(b);
                    lose_b(b);
                    generate_program_pascal(g);
                    fclose(o->output_src);
                }
 #endif
 #ifndef DISABLE_PYTHON
                if (o->make_lang == LANG_PYTHON) {
                    symbol * b = add_s_to_b(0, s);
                    b = add_s_to_b(b, ".py");
                    o->output_src = get_output(b);
                    lose_b(b);
                    generate_program_python(g);
                    fclose(o->output_src);
                }
 #endif
 #ifndef DISABLE_JS
                if (o->make_lang == LANG_JAVASCRIPT) {
                    symbol * b = add_s_to_b(0, s);
                    b = add_s_to_b(b, ".js");
                    o->output_src = get_output(b);
                    lose_b(b);
                    generate_program_js(g);
                    fclose(o->output_src);
                }
 #endif
 #ifndef DISABLE_CSHARP
                if (o->make_lang == LANG_CSHARP) {
                    symbol * b = add_s_to_b(0, s);
                    b = add_s_to_b(b, ".cs");
                    o->output_src = get_output(b);
                    lose_b(b);
                    generate_program_csharp(g);
                    fclose(o->output_src);
                }
 #endif
 #ifndef DISABLE_RUST
                if (o->make_lang == LANG_RUST) {
                    symbol * b = add_s_to_b(0, s);
                    b = add_s_to_b(b, ".rs");
                    o->output_src = get_output(b);
                    lose_b(b);
                    generate_program_rust(g);
                    fclose(o->output_src);
                }
 #endif
 #ifndef DISABLE_GO
                if (o->make_lang == LANG_GO) {
                    symbol * b = add_s_to_b(0, s);
                    b = add_s_to_b(b, ".go");
                    o->output_src = get_output(b);
                    lose_b(b);
                    generate_program_go(g);
                    fclose(o->output_src);
                }
 #endif
                close_generator(g);
            }
            close_analyser(a);
        }
        lose_b(u);
        lose_b(filename);
    }
    {   struct include * p = o->includes;
        until (p == 0)
        {   struct include * q = p->next;
        while (p) {
            struct include * q = p->next;
            lose_b(p->b); FREE(p); p = q;
        }
    }
    FREE(o);
    unless (space_count == 0) fprintf(stderr, "%d blocks unfreed\n", space_count);
    if (space_count) fprintf(stderr, "%d blocks unfreed\n", space_count);
    return 0;
 }

--- a/contrib/snowball/compiler/generator.c
+++ b/contrib/snowball/compiler/generator.c
--- a/contrib/snowball/compiler/generator_java.c
+++ b/contrib/snowball/compiler/generator_java.c
--- a/contrib/snowball/compiler/header.h
+++ b/contrib/snowball/compiler/header.h
@@ -1,49 +1,56 @@
 #include <stdio.h>

 #define SNOWBALL_VERSION "2.0.0"

 typedef unsigned char byte;
 typedef unsigned short symbol;

 #define true 1
 #define false 0
 #define repeat while(true)
 #define unless(C) if(!(C))
 #define until(C) while(!(C))

 #define MALLOC check_malloc
 #define FREE check_free

 #define NEW(type, p) struct type * p = (struct type *) MALLOC(sizeof(struct type))
 #define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * n)
 #define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * (n))

 #define STARTSIZE   10
 #define SIZE(p)     ((int *)(p))[-1]
 #define CAPACITY(p) ((int *)(p))[-2]

 extern symbol * create_b(int n);
 extern void report_b(FILE * out, symbol * p);
 extern void report_b(FILE * out, const symbol * p);
 extern void lose_b(symbol * p);
 extern symbol * increase_capacity(symbol * p, int n);
 extern symbol * move_to_b(symbol * p, int n, symbol * q);
 extern symbol * add_to_b(symbol * p, int n, symbol * q);
 extern symbol * copy_b(symbol * p);
 extern char * b_to_s(symbol * p);
 extern symbol * move_to_b(symbol * p, int n, const symbol * q);
 extern symbol * add_to_b(symbol * p, int n, const symbol * q);
 extern symbol * copy_b(const symbol * p);
 extern char * b_to_s(const symbol * p);
 extern symbol * add_s_to_b(symbol * p, const char * s);

 #define MOVE_TO_B(B, LIT) \
    move_to_b(B, sizeof(LIT) / sizeof(LIT[0]), LIT)

 struct str; /* defined in space.c */

 extern struct str * str_new(void);
 extern void str_delete(struct str * str);
 extern void str_append(struct str * str, struct str * add);
 extern void str_append(struct str * str, const struct str * add);
 extern void str_append_ch(struct str * str, char add);
 extern void str_append_b(struct str * str, symbol * q);
 extern void str_append_b(struct str * str, const symbol * q);
 extern void str_append_b_tail(struct str * str, const symbol * q, int skip);
 extern void str_append_string(struct str * str, const char * s);
 extern void str_append_int(struct str * str, int i);
 extern void str_clear(struct str * str);
 extern void str_assign(struct str * str, char * s);
 extern struct str * str_copy(struct str * old);
 extern symbol * str_data(struct str * str);
 extern int str_len(struct str * str);
 extern void str_assign(struct str * str, const char * s);
 extern struct str * str_copy(const struct str * old);
 extern symbol * str_data(const struct str * str);
 extern int str_len(const struct str * str);
 extern int str_back(const struct str *str);
 extern int get_utf8(const symbol * p, int * slot);
 extern int put_utf8(int ch, symbol * p);
 extern void output_str(FILE * outfile, struct str * str);

 typedef enum { ENC_SINGLEBYTE, ENC_UTF8, ENC_WIDECHARS } enc;

 struct m_pair {

@@ -60,6 +67,7 @@ struct input {
    symbol * p;
    int c;
    char * file;
    int file_needs_freeing;
    int line_number;

 };
@@ -71,6 +79,28 @@ struct include {

 };

 enum token_codes {

 #include "syswords2.h"

    c_mathassign,
    c_name,
    c_number,
    c_literalstring,
    c_neg,
    c_call,
    c_grouping,
    c_booltest,

    NUM_TOKEN_CODES
 };

 enum uplus_modes {
    UPLUS_NONE,
    UPLUS_DEFINED,
    UPLUS_UNICODE
 };

 /* struct input must be a prefix of struct tokeniser. */
 struct tokeniser {

@@ -78,6 +108,7 @@ struct tokeniser {
    symbol * p;
    int c;
    char * file;
    int file_needs_freeing;
    int line_number;
    symbol * b;
    symbol * b2;
@@ -90,34 +121,28 @@ struct tokeniser {
    int token;
    int previous_token;
    byte token_held;
    byte widechars;
    byte utf8;
    enc encoding;

    int omission;
    struct include * includes;

    /* Mode in which U+ has been used:
     * UPLUS_NONE - not used yet
     * UPLUS_DEFINED - stringdef U+xxxx ....
     * UPLUS_UNICODE - {U+xxxx} used with implicit meaning
     */
    int uplusmode;

    char token_disabled[NUM_TOKEN_CODES];
 };

 extern symbol * get_input(symbol * p, char ** p_file);
 extern symbol * get_input(const char * filename);
 extern struct tokeniser * create_tokeniser(symbol * b, char * file);
 extern int read_token(struct tokeniser * t);
 extern const char * name_of_token(int code);
 extern void disable_token(struct tokeniser * t, int code);
 extern void close_tokeniser(struct tokeniser * t);

 enum token_codes {

 #include "syswords2.h"

    c_mathassign,
    c_name,
    c_number,
    c_literalstring,
    c_neg,
    c_call,
    c_grouping,
    c_booltest
 };

 extern int space_count;
 extern void * check_malloc(int n);
 extern void check_free(void * p);
@@ -134,7 +159,13 @@ struct name {
    int count;                  /* 0, 1, 2 for each type */
    struct grouping * grouping; /* for grouping names */
    byte referenced;
    byte used;
    byte used_in_among;         /* Function used in among? */
    byte value_used;            /* (For variables) is its value ever used? */
    byte initialised;           /* (For variables) is it ever initialised? */
    byte used_in_definition;    /* (grouping) used in grouping definition? */
    struct node * used;         /* First use, or NULL if not used */
    struct name * local_to;     /* Local to one routine/external */
    int declaration_line_number;/* Line number of declaration */

 };

@@ -149,9 +180,10 @@ struct amongvec {

    symbol * b;      /* the string giving the case */
    int size;        /* - and its size */
    struct node * p; /* the corresponding command */
    struct node * action; /* the corresponding action */
    int i;           /* the amongvec index of the longest substring of b */
    int result;      /* the numeric result for the case */
    int line_number; /* for diagnostics and stable sorting */
    struct name * function;

 };
@@ -163,19 +195,22 @@ struct among {
    int number;               /* amongs are numbered 0, 1, 2 ... */
    int literalstring_count;  /* in this among */
    int command_count;        /* in this among */
    int nocommand_count;      /* number of "no command" entries in this among */
    int function_count;       /* in this among */
    int amongvar_needed;      /* do we need to set among_var? */
    struct node * starter;    /* i.e. among( (starter) 'string' ... ) */
    struct node * substring;  /* i.e. substring ... among ( ... ) */
    struct node ** commands;  /* array with command_count entries */
 };

 struct grouping {

    struct grouping * next;
    int number;               /* groupings are numbered 0, 1, 2 ... */
    symbol * b;               /* the characters of this group */
    int largest_ch;           /* character with max code */
    int smallest_ch;          /* character with min code */
    byte no_gaps;             /* not used in generator.c after 11/5/05 */
    struct name * name;       /* so g->name->grouping == g */
    int line_number;
 };

 struct node {
@@ -234,7 +269,8 @@ struct analyser {
    struct grouping * groupings;
    struct grouping * groupings_end;
    struct node * substring;  /* pending 'substring' in current routine definition */
    byte utf8;
    enc encoding;
    byte int_limits_used;     /* are maxint or minint used? */
 };

 enum analyser_modes {
@@ -259,16 +295,23 @@ struct generator {
    struct str * outbuf;       /* temporary str to store output */
    struct str * declarations; /* str storing variable declarations */
    int next_label;
 #ifndef DISABLE_PYTHON
    int max_label;
 #endif
    int margin;

    const char * failure_string;     /* String to output in case of a failure. */
 #ifndef DISABLE_JAVA
    struct str * failure_str;  /* This is used by the java generator instead of failure_string */
    /* if > 0, keep_count to restore in case of a failure;
     * if < 0, the negated keep_count for the limit to restore in case of
     * failure. */
    int failure_keep_count;
 #if !defined(DISABLE_JAVA) && !defined(DISABLE_JS) && !defined(DISABLE_PYTHON) && !defined(DISABLE_CSHARP)
    struct str * failure_str;  /* This is used by some generators instead of failure_keep_count */
 #endif

    int label_used;     /* Keep track of whether the failure label is used. */
    int failure_label;
    int debug_count;
    int copy_from_count; /* count of calls to copy_from() */

    const char * S[10];  /* strings */
    symbol * B[10];      /* blocks */
@@ -277,48 +320,92 @@ struct generator {
    symbol * L[5];       /* literals, used in formatted write */

    int line_count;      /* counts number of lines output */
    int line_labelled;   /* in ANSI C, will need extra ';' if it is a block end */
    int line_labelled;   /* in ISO C, will need extra ';' if it is a block end */
    int literalstring_count;
    int keep_count;      /* used to number keep/restore pairs to avoid compiler warnings
                            about shadowed variables */
 };

 /* Special values for failure_label in struct generator. */
 enum special_labels {
    x_return = -1
 };

 struct options {

    /* for the command line: */

    char * output_file;
    char * name;
    FILE * output_c;
    const char * output_file;
    const char * name;
    FILE * output_src;
    FILE * output_h;
 #ifndef DISABLE_JAVA
    FILE * output_java;
 #endif
    byte syntax_tree;
    byte widechars;
    enum { LANG_JAVA, LANG_C, LANG_CPLUSPLUS } make_lang;
    char * externals_prefix;
    char * variables_prefix;
    char * runtime_path;
    char * parent_class_name;
    char * package;
    char * string_class;
    char * among_class;
    byte comments;
    enc encoding;
    enum { LANG_JAVA, LANG_C, LANG_CPLUSPLUS, LANG_CSHARP, LANG_PASCAL, LANG_PYTHON, LANG_JAVASCRIPT, LANG_RUST, LANG_GO } make_lang;
    const char * externals_prefix;
    const char * variables_prefix;
    const char * runtime_path;
    const char * parent_class_name;
    const char * package;
    const char * go_snowball_runtime;
    const char * string_class;
    const char * among_class;
    struct include * includes;
    struct include * includes_end;
    byte utf8;
 };

 /* Generator for C code. */
 extern struct generator * create_generator_c(struct analyser * a, struct options * o);
 extern void close_generator_c(struct generator * g);
 /* Generator functions common to several backends. */

 extern struct generator * create_generator(struct analyser * a, struct options * o);
 extern void close_generator(struct generator * g);

 extern void write_char(struct generator * g, int ch);
 extern void write_newline(struct generator * g);
 extern void write_string(struct generator * g, const char * s);
 extern void write_int(struct generator * g, int i);
 extern void write_b(struct generator * g, symbol * b);
 extern void write_str(struct generator * g, struct str * str);

 extern void write_comment_content(struct generator * g, struct node * p);
 extern void write_generated_comment_content(struct generator * g);
 extern void write_start_comment(struct generator * g,
                                const char * comment_start,
                                const char * comment_end);

 extern int K_needed(struct generator * g, struct node * p);
 extern int repeat_restore(struct generator * g, struct node * p);

 /* Generator for C code. */
 extern void generate_program_c(struct generator * g);

 #ifndef DISABLE_JAVA
 /* Generator for Java code. */
 extern struct generator * create_generator_java(struct analyser * a, struct options * o);
 extern void close_generator_java(struct generator * g);

 extern void generate_program_java(struct generator * g);
 #endif

 #ifndef DISABLE_CSHARP
 /* Generator for C# code. */
 extern void generate_program_csharp(struct generator * g);
 #endif

 #ifndef DISABLE_PASCAL
 extern void generate_program_pascal(struct generator * g);
 #endif

 #ifndef DISABLE_PYTHON
 /* Generator for Python code. */
 extern void generate_program_python(struct generator * g);
 #endif

 #ifndef DISABLE_JS
 extern void generate_program_js(struct generator * g);
 #endif

 #ifndef DISABLE_RUST
 extern void generate_program_rust(struct generator * g);
 #endif

 #ifndef DISABLE_GO
 extern void generate_program_go(struct generator * g);
 #endif
--- a/contrib/snowball/compiler/space.c
+++ b/contrib/snowball/compiler/space.c
@@ -57,9 +57,19 @@ extern symbol * create_b(int n) {
    return p;
 }

 extern void report_b(FILE * out, symbol * p) {
 extern void report_b(FILE * out, const symbol * p) {
    int i;
    for (i = 0; i < SIZE(p); i++) fprintf(out, "%c", p[i]);
    for (i = 0; i < SIZE(p); i++) {
        if (p[i] > 255) {
            printf("In report_b, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
            exit(1);
        }
        putc(p[i], out);
    }
 }

 extern void output_str(FILE * outfile, struct str * str) {
    report_b(outfile, str_data(str));
 }

 extern void lose_b(symbol * p) {
@@ -74,19 +84,19 @@ extern symbol * increase_capacity(symbol * p, int n) {
    lose_b(p); return q;
 }

 extern symbol * move_to_b(symbol * p, int n, symbol * q) {
 extern symbol * move_to_b(symbol * p, int n, const symbol * q) {
    int x = n - CAPACITY(p);
    if (x > 0) p = increase_capacity(p, x);
    memmove(p, q, n * sizeof(symbol)); SIZE(p) = n; return p;
 }

 extern symbol * add_to_b(symbol * p, int n, symbol * q) {
 extern symbol * add_to_b(symbol * p, int n, const symbol * q) {
    int x = SIZE(p) + n - CAPACITY(p);
    if (x > 0) p = increase_capacity(p, x);
    memmove(p + SIZE(p), q, n * sizeof(symbol)); SIZE(p) += n; return p;
 }

 extern symbol * copy_b(symbol * p) {
 extern symbol * copy_b(const symbol * p) {
    int n = SIZE(p);
    symbol * q = create_b(n);
    move_to_b(q, n, p);
@@ -97,7 +107,7 @@ int space_count = 0;

 extern void * check_malloc(int n) {
    space_count++;
    return calloc(1, n);
    return malloc(n);
 }

 extern void check_free(void * p) {
@@ -107,18 +117,18 @@ extern void check_free(void * p) {

 /* To convert a block to a zero terminated string:  */

 extern char * b_to_s(symbol * p) {
 extern char * b_to_s(const symbol * p) {
    int n = SIZE(p);
    char * s = (char *)calloc(1, n + 1);
    char * s = (char *)malloc(n + 1);
    {
        int i;
        for (i = 0; i < n; i++) {
 	    if (p[i] > 255) {
 		printf("In b_to_s, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
 		exit(1);
 	    }
 	    s[i] = (char)p[i];
 	}
            if (p[i] > 255) {
                printf("In b_to_s, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
                exit(1);
            }
            s[i] = (char)p[i];
        }
    }
    s[n] = 0;
    return s;
@@ -153,9 +163,9 @@ struct str {
 };

 /* Create a new string. */
 extern struct str * str_new() {
 extern struct str * str_new(void) {

    struct str * output = (struct str *) calloc(1, sizeof(struct str));
    struct str * output = (struct str *) malloc(sizeof(struct str));
    output->data = create_b(0);
    return output;
 }
@@ -168,7 +178,7 @@ extern void str_delete(struct str * str) {
 }

 /* Append a str to this str. */
 extern void str_append(struct str * str, struct str * add) {
 extern void str_append(struct str * str, const struct str * add) {

    symbol * q = add->data;
    str->data = add_to_b(str->data, SIZE(q), q);
@@ -183,12 +193,19 @@ extern void str_append_ch(struct str * str, char add) {
 }

 /* Append a low level block to a str. */
 extern void str_append_b(struct str * str, symbol * q) {
 extern void str_append_b(struct str * str, const symbol * q) {

    str->data = add_to_b(str->data, SIZE(q), q);
 }

 /* Append a (char *, null teminated) string to a str. */
 /* Append the tail of a low level block to a str. */
 extern void str_append_b_tail(struct str * str, const symbol * q, int skip) {
    if (skip < 0 || skip >= SIZE(q)) return;

    str->data = add_to_b(str->data, SIZE(q) - skip, q + skip);
 }

 /* Append a (char *, null terminated) string to a str. */
 extern void str_append_string(struct str * str, const char * s) {

    str->data = add_s_to_b(str->data, s);
@@ -209,14 +226,14 @@ extern void str_clear(struct str * str) {
 }

 /* Set a string */
 extern void str_assign(struct str * str, char * s) {
 extern void str_assign(struct str * str, const char * s) {

    str_clear(str);
    str_append_string(str, s);
 }

 /* Copy a string. */
 extern struct str * str_copy(struct str * old) {
 extern struct str * str_copy(const struct str * old) {

    struct str * newstr = str_new();
    str_append(newstr, old);
@@ -224,17 +241,25 @@ extern struct str * str_copy(struct str * old) {
 }

 /* Get the data stored in this str. */
 extern symbol * str_data(struct str * str) {
 extern symbol * str_data(const struct str * str) {

    return str->data;
 }

 /* Get the length of the str. */
 extern int str_len(struct str * str) {
 extern int str_len(const struct str * str) {

    return SIZE(str->data);
 }

 /* Get the last character of the str.
 *
 * Or -1 if the string is empty.
 */
 extern int str_back(const struct str *str) {
    return SIZE(str->data) ? str->data[SIZE(str->data) - 1] : -1;
 }

 extern int get_utf8(const symbol * p, int * slot) {
    int b0, b1;
    b0 = *p++;
@@ -260,4 +285,3 @@ extern int put_utf8(int ch, symbol * p) {
    p[1] = ((ch >> 6) & 0x3F) | 0x80;
    p[2] = (ch & 0x3F) | 0x80; return 3;
 }

--- a/contrib/snowball/compiler/syswords.h
+++ b/contrib/snowball/compiler/syswords.h
@@ -1,5 +1,5 @@
 static const struct system_word vocab[80+1] = {
  { 0, (const byte *)"", 80+1},
 static const struct system_word vocab[82+1] = {
  { 0, (const byte *)"", 82+1},

  { 1, (const byte *)"$",             c_dollar },
  { 1, (const byte *)"(",             c_bra },
@@ -36,6 +36,7 @@ static const struct system_word vocab[80+1] = {
  { 3, (const byte *)"get",           c_get },
  { 3, (const byte *)"hex",           c_hex },
  { 3, (const byte *)"hop",           c_hop },
  { 3, (const byte *)"len",           c_len },
  { 3, (const byte *)"non",           c_non },
  { 3, (const byte *)"not",           c_not },
  { 3, (const byte *)"set",           c_set },
@@ -49,6 +50,7 @@ static const struct system_word vocab[80+1] = {
  { 4, (const byte *)"true",          c_true },
  { 5, (const byte *)"among",         c_among },
  { 5, (const byte *)"false",         c_false },
  { 5, (const byte *)"lenof",         c_lenof },
  { 5, (const byte *)"limit",         c_limit },
  { 5, (const byte *)"unset",         c_unset },
  { 6, (const byte *)"atmark",        c_atmark },
--- a/contrib/snowball/compiler/syswords2.h
+++ b/contrib/snowball/compiler/syswords2.h
@@ -4,8 +4,8 @@
    c_decimal, c_define, c_delete, c_divide, c_divideassign, c_do,
    c_dollar, c_eq, c_externals, c_fail, c_false, c_for, c_ge, c_get,
    c_gopast, c_goto, c_gr, c_groupings, c_hex, c_hop, c_insert,
    c_integers, c_ket, c_le, c_leftslice, c_limit, c_loop, c_ls,
    c_maxint, c_minint, c_minus, c_minusassign, c_multiply,
    c_integers, c_ket, c_le, c_leftslice, c_len, c_lenof, c_limit, c_loop,
    c_ls, c_maxint, c_minint, c_minus, c_minusassign, c_multiply,
    c_multiplyassign, c_ne, c_next, c_non, c_not, c_or, c_plus,
    c_plusassign, c_repeat, c_reverse, c_rightslice, c_routines,
    c_set, c_setlimit, c_setmark, c_size, c_sizeof, c_slicefrom,
--- a/contrib/snowball/compiler/tokeniser.c
+++ b/contrib/snowball/compiler/tokeniser.c
@@ -16,57 +16,57 @@ struct system_word {

 #include "syswords.h"

 static int smaller(int a, int b) { return a < b ? a : b; }
 #define INITIAL_INPUT_BUFFER_SIZE 8192

 static int hex_to_num(int ch);

 extern symbol * get_input(symbol * p, char ** p_file) {
 static int smaller(int a, int b) { return a < b ? a : b; }

    char * s = b_to_s(p);
 extern symbol * get_input(const char * filename) {
    FILE * input = fopen(filename, "r");
    if (input == 0) { return 0; }
    {
        FILE * input = fopen(s, "r");
        if (input == 0) { free(s); return 0; }
        *p_file = s;
        {
            symbol * u = create_b(STARTSIZE);
            int size = 0;
            repeat
            {   int ch = getc(input);
                if (ch == EOF) break;
                if (size >= CAPACITY(u)) u = increase_capacity(u, size/2);
                u[size++] = ch;
            }
            fclose(input);
            SIZE(u) = size; return u;
        symbol * u = create_b(INITIAL_INPUT_BUFFER_SIZE);
        int size = 0;
        while (true) {
            int ch = getc(input);
            if (ch == EOF) break;
            if (size >= CAPACITY(u)) u = increase_capacity(u, size);
            u[size++] = ch;
        }
        fclose(input);
        SIZE(u) = size;
        return u;
    }
 }

 static void error(struct tokeniser * t, char * s1, int n, symbol * p, char * s2) {
 static void error(struct tokeniser * t, const char * s1, int n, symbol * p, const char * s2) {
    if (t->error_count == 20) { fprintf(stderr, "... etc\n"); exit(1); }
    fprintf(stderr, "%s:%d: ", t->file, t->line_number);
    unless (s1 == 0) fprintf(stderr, "%s", s1);
    unless (p == 0) {
    if (s1) fprintf(stderr, "%s", s1);
    if (p) {
        int i;
        for (i = 0; i < n; i++) fprintf(stderr, "%c", p[i]);
    }
    unless (s2 == 0) fprintf(stderr, "%s", s2);
    if (s2) fprintf(stderr, "%s", s2);
    fprintf(stderr, "\n");
    t->error_count++;
 }

 static void error1(struct tokeniser * t, char * s) {
 static void error1(struct tokeniser * t, const char * s) {
    error(t, s, 0,0, 0);
 }

 static void error2(struct tokeniser * t, char * s) {
 static void error2(struct tokeniser * t, const char * s) {
    error(t, "unexpected end of text after ", 0,0, s);
 }

 static int compare_words(int m, symbol * p, int n, const byte * q) {
    unless (m == n) return m - n;
    if (m != n) return m - n;
    {
        int i; for (i = 0; i < n; i++) {
            int diff = p[i] - q[i];
            unless (diff == 0) return diff;
            if (diff) return diff;
        }
    }
    return 0;
@@ -74,14 +74,13 @@ static int compare_words(int m, symbol * p, int n, const byte * q) {

 static int find_word(int n, symbol * p) {
    int i = 0; int j = vocab->code;
    repeat {
    do {
        int k = i + (j - i)/2;
        const struct system_word * w = vocab + k;
        int diff = compare_words(n, p, w->s_size, w->s);
        if (diff == 0) return w->code;
        if (diff < 0) j = k; else i = k;
        if (j - i == 1) break;
    }
    } while (j - i != 1);
    return -1;
 }

@@ -91,7 +90,7 @@ static int get_number(int n, symbol * p) {
    return x;
 }

 static int eq_s(struct tokeniser * t, char * s) {
 static int eq_s(struct tokeniser * t, const char * s) {
    int l = strlen(s);
    if (SIZE(t->p) - t->c < l) return false;
    {
@@ -103,64 +102,141 @@ static int eq_s(struct tokeniser * t, char * s) {

 static int white_space(struct tokeniser * t, int ch) {
    switch (ch) {
        case '\n': t->line_number++;
        case '\n':
            t->line_number++;
            /* fall through */
        case '\r':
        case '\t':
        case ' ': return true;
        case ' ':
            return true;
    }
    return false;
 }

 static symbol * find_in_m(struct tokeniser * t, int n, symbol * p) {
    struct m_pair * q = t->m_pairs;
    repeat {
        if (q == 0) return 0;
        {
            symbol * name = q->name;
            if (n == SIZE(name) && memcmp(name, p, n * sizeof(symbol)) == 0) return q->value;
        }
        q = q->next;
    struct m_pair * q;
    for (q = t->m_pairs; q; q = q->next) {
        symbol * name = q->name;
        if (n == SIZE(name) && memcmp(name, p, n * sizeof(symbol)) == 0) return q->value;
    }
    return 0;
 }

 static int read_literal_string(struct tokeniser * t, int c) {
    symbol * p = t->p;
    int ch;
    SIZE(t->b) = 0;
    repeat {
    while (true) {
        if (c >= SIZE(p)) { error2(t, "'"); return c; }
        ch = p[c];
        if (ch == '\n') { error1(t, "string not terminated"); return c; }
        c++;
        if (ch == t->m_start) {
            /* Inside insert characters. */
            int c0 = c;
            int newlines = false; /* no newlines as yet */
            int black_found = false; /* no printing chars as yet */
            repeat {
            while (true) {
                if (c >= SIZE(p)) { error2(t, "'"); return c; }
                ch = p[c]; c++;
                if (ch == t->m_end) break;
                unless (white_space(t, ch)) black_found = true;
                if (!white_space(t, ch)) black_found = true;
                if (ch == '\n') newlines = true;
                if (newlines && black_found) {
                    error1(t, "string not terminated");
                    return c;
                }
            }
            unless (newlines) {
            if (!newlines) {
                int n = c - c0 - 1;    /* macro size */
                int firstch = p[c0];
                symbol * q = find_in_m(t, n, p + c0);
                if (q == 0) {
                    if (n == 1 && (firstch == '\'' || firstch == t->m_start))
                        t->b = add_to_b(t->b, 1, p + c0);
                    else
                    else if (n >= 3 && firstch == 'U' && p[c0 + 1] == '+') {
                        int codepoint = 0;
                        int x;
                        if (t->uplusmode == UPLUS_DEFINED) {
                            /* See if found with xxxx upper-cased. */
                            symbol * uc = create_b(n);
                            int i;
                            for (i = 0; i != n; ++i) {
                                uc[i] = toupper(p[c0 + i]);
                            }
                            q = find_in_m(t, n, uc);
                            lose_b(uc);
                            if (q != 0) {
                                t->b = add_to_b(t->b, SIZE(q), q);
                                continue;
                            }
                            error1(t, "Some U+xxxx stringdefs seen but not this one");
                        } else {
                            t->uplusmode = UPLUS_UNICODE;
                        }
                        for (x = c0 + 2; x != c - 1; ++x) {
                            int hex = hex_to_num(p[x]);
                            if (hex < 0) {
                                error1(t, "Bad hex digit following U+");
                                break;
                            }
                            codepoint = (codepoint << 4) | hex;
                        }
                        if (t->encoding == ENC_UTF8) {
                            if (codepoint < 0 || codepoint > 0x01ffff) {
                                error1(t, "character values exceed 0x01ffff");
                            }
                            /* Ensure there's enough space for a max length
                             * UTF-8 sequence. */
                            if (CAPACITY(t->b) < SIZE(t->b) + 3) {
                                t->b = increase_capacity(t->b, 3);
                            }
                            SIZE(t->b) += put_utf8(codepoint, t->b + SIZE(t->b));
                        } else {
                            symbol sym;
                            if (t->encoding == ENC_SINGLEBYTE) {
                                /* Only ISO-8859-1 is handled this way - for
                                 * other single-byte character sets you need
                                 * stringdef all the U+xxxx codes you use
                                 * like - e.g.:
                                 *
                                 * stringdef U+0171   hex 'FB'
                                 */
                                if (codepoint < 0 || codepoint > 0xff) {
                                    error1(t, "character values exceed 256");
                                }
                            } else {
                                if (codepoint < 0 || codepoint > 0xffff) {
                                    error1(t, "character values exceed 64K");
                                }
                            }
                            sym = codepoint;
                            t->b = add_to_b(t->b, 1, &sym);
                        }
                    } else
                        error(t, "string macro '", n, p + c0, "' undeclared");
                } else
                    t->b = add_to_b(t->b, SIZE(q), q);
            }
        } else {
            if (ch == '\'') return c;
            if (ch < 0 || ch >= 0x80) {
                if (t->encoding != ENC_WIDECHARS) {
                    /* We don't really want people using non-ASCII literal
                     * strings, but historically it's worked for single-byte
                     * and UTF-8 if the source encoding matches what the
                     * generated stemmer works in and it seems unfair to just
                     * suddenly make this a hard error.`
                     */
                    fprintf(stderr,
                            "%s:%d: warning: Non-ASCII literal strings aren't "
                            "portable - use stringdef instead\n",
                            t->file, t->line_number);
                } else {
                    error1(t, "Non-ASCII literal strings aren't "
                              "portable - use stringdef instead");
                }
            }
            t->b = add_to_b(t->b, 1, p + c - 1);
        }
    }
@@ -171,7 +247,7 @@ static int next_token(struct tokeniser * t) {
    int c = t->c;
    int ch;
    int code = -1;
    repeat {
    while (true) {
        if (c >= SIZE(p)) { t->c = c; return -1; }
        ch = p[c];
        if (white_space(t, ch)) { c++; continue; }
@@ -179,7 +255,7 @@ static int next_token(struct tokeniser * t) {
            int c0 = c;
            while (c < SIZE(p) && (isalnum(p[c]) || p[c] == '_')) c++;
            code = find_word(c - c0, p + c0);
            if (code < 0) {
            if (code < 0 || t->token_disabled[code]) {
                t->b = move_to_b(t->b, c - c0, p + c0);
                code = c_name;
            }
@@ -218,10 +294,9 @@ static int next_char(struct tokeniser * t) {
 }

 static int next_real_char(struct tokeniser * t) {
    repeat {
    while (true) {
        int ch = next_char(t);
        if (white_space(t, ch)) continue;
        return ch;
        if (!white_space(t, ch)) return ch;
    }
 }

@@ -230,7 +305,7 @@ static void read_chars(struct tokeniser * t) {
    if (ch < 0) { error2(t, "stringdef"); return; }
    {
        int c0 = t->c-1;
        repeat {
        while (true) {
            ch = next_char(t);
            if (white_space(t, ch) || ch < 0) break;
        }
@@ -246,19 +321,20 @@ static int decimal_to_num(int ch) {
 static int hex_to_num(int ch) {
    if ('0' <= ch && ch <= '9') return ch - '0';
    if ('a' <= ch && ch <= 'f') return ch - 'a' + 10;
    if ('A' <= ch && ch <= 'F') return ch - 'A' + 10;
    return -1;
 }

 static void convert_numeric_string(struct tokeniser * t, symbol * p, int base) {
    int c = 0; int d = 0;
    repeat {
    while (true) {
        while (c < SIZE(p) && p[c] == ' ') c++;
        if (c == SIZE(p)) break;
        {
            int number = 0;
            repeat {
            while (c != SIZE(p)) {
                int ch = p[c];
                if (c == SIZE(p) || ch == ' ') break;
                if (ch == ' ') break;
                if (base == 10) {
                    ch = decimal_to_num(ch);
                    if (ch < 0) {
@@ -266,7 +342,7 @@ static void convert_numeric_string(struct tokeniser * t, symbol * p, int base) {
                        return;
                    }
                } else {
                    ch = hex_to_num(tolower(ch));
                    ch = hex_to_num(ch);
                    if (ch < 0) {
                        error1(t, "hex string contains non-hex characters");
                        return;
@@ -275,18 +351,18 @@ static void convert_numeric_string(struct tokeniser * t, symbol * p, int base) {
                number = base * number + ch;
                c++;
            }
            if (t->widechars || t->utf8) {
                unless (0 <= number && number <= 0xffff) {
                    error1(t, "character values exceed 64K");
            if (t->encoding == ENC_SINGLEBYTE) {
                if (number < 0 || number > 0xff) {
                    error1(t, "character values exceed 256");
                    return;
                }
            } else {
                unless (0 <= number && number <= 0xff) {
                    error1(t, "character values exceed 256");
                if (number < 0 || number > 0xffff) {
                    error1(t, "character values exceed 64K");
                    return;
                }
            }
            if (t->utf8)
            if (t->encoding == ENC_UTF8)
                d += put_utf8(number, p + d);
            else
                p[d++] = number;
@@ -300,104 +376,118 @@ extern int read_token(struct tokeniser * t) {
    int held = t->token_held;
    t->token_held = false;
    if (held) return t->token;
    repeat {
    while (true) {
        int code = next_token(t);
        switch (code) {
            case c_comment1: /*  slash-slash comment */
               while (t->c < SIZE(p) && p[t->c] != '\n') t->c++;
               continue;
                while (t->c < SIZE(p) && p[t->c] != '\n') t->c++;
                continue;
            case c_comment2: /* slash-star comment */
               repeat {
                   if (t->c >= SIZE(p)) {
                       error1(t, "/* comment not terminated");
                       t->token = -1;
                       return -1;
                   }
                   if (p[t->c] == '\n') t->line_number++;
                   if (eq_s(t, "*/")) break;
                   t->c++;
               }
               continue;
            case c_stringescapes:
               {
                   int ch1 = next_real_char(t);
                   int ch2 = next_real_char(t);
                   if (ch2 < 0)
                       { error2(t, "stringescapes"); continue; }
                   if (ch1 == '\'')
                       { error1(t, "first stringescape cannot be '"); continue; }
                   t->m_start = ch1;
                   t->m_end = ch2;
               }
               continue;
            case c_stringdef:
               {
                   int base = 0;
                   read_chars(t);
                   code = read_token(t);
                   if (code == c_hex) { base = 16; code = read_token(t); } else
                   if (code == c_decimal) { base = 10; code = read_token(t); }
                   unless (code == c_literalstring)
                       { error1(t, "string omitted after stringdef"); continue; }
                   if (base > 0) convert_numeric_string(t, t->b, base);
                   {   NEW(m_pair, q);
                       q->next = t->m_pairs;
                       q->name = copy_b(t->b2);
                       q->value = copy_b(t->b);
                       t->m_pairs = q;
                   }
               }
               continue;
                while (true) {
                    if (t->c >= SIZE(p)) {
                        error1(t, "/* comment not terminated");
                        t->token = -1;
                        return -1;
                    }
                    if (p[t->c] == '\n') t->line_number++;
                    if (eq_s(t, "*/")) break;
                    t->c++;
                }
                continue;
            case c_stringescapes: {
                int ch1 = next_real_char(t);
                int ch2 = next_real_char(t);
                if (ch2 < 0) {
                    error2(t, "stringescapes");
                    continue;
                }
                if (ch1 == '\'') {
                    error1(t, "first stringescape cannot be '");
                    continue;
                }
                t->m_start = ch1;
                t->m_end = ch2;
                continue;
            }
            case c_stringdef: {
                int base = 0;
                read_chars(t);
                code = read_token(t);
                if (code == c_hex) { base = 16; code = read_token(t); } else
                if (code == c_decimal) { base = 10; code = read_token(t); }
                if (code != c_literalstring) {
                    error1(t, "string omitted after stringdef");
                    continue;
                }
                if (base > 0) convert_numeric_string(t, t->b, base);
                {   NEW(m_pair, q);
                    q->next = t->m_pairs;
                    q->name = copy_b(t->b2);
                    q->value = copy_b(t->b);
                    t->m_pairs = q;
                    if (t->uplusmode != UPLUS_DEFINED &&
                        (SIZE(t->b2) >= 3 && t->b2[0] == 'U' && t->b2[1] == '+')) {
                        if (t->uplusmode == UPLUS_UNICODE) {
                            error1(t, "U+xxxx already used with implicit meaning");
                        } else {
                            t->uplusmode = UPLUS_DEFINED;
                        }
                    }
                }
                continue;
            }
            case c_get:
               code = read_token(t);
               unless (code == c_literalstring) {
                   error1(t, "string omitted after get"); continue;
               }
               t->get_depth++;
               if (t->get_depth > 10) {
                   fprintf(stderr, "get directives go 10 deep. Looping?\n");
                   exit(1);
               }
               {
                   char * file;
                   NEW(input, q);
                   symbol * u = get_input(t->b, &file);
                   if (u == 0) {
                       struct include * r = t->includes;
                       until (r == 0) {
                           symbol * b = copy_b(r->b);
                           b = add_to_b(b, SIZE(t->b), t->b);
                           u = get_input(b, &file);
                           lose_b(b);
                           unless (u == 0) break;
                           r = r->next;
                       }
                   }
                   if (u == 0) {
                       error(t, "Can't get '", SIZE(t->b), t->b, "'");
                       exit(1);
                   }
                   memmove(q, t, sizeof(struct input));
                   t->next = q;
                   t->p = u;
                   t->c = 0;
                   t->file = file;
                   t->line_number = 1;
               }
               p = t->p;
               continue;
                code = read_token(t);
                if (code != c_literalstring) {
                    error1(t, "string omitted after get"); continue;
                }
                t->get_depth++;
                if (t->get_depth > 10) {
                    fprintf(stderr, "get directives go 10 deep. Looping?\n");
                    exit(1);
                }
                {
                    NEW(input, q);
                    char * file = b_to_s(t->b);
                    symbol * u = get_input(file);
                    if (u == 0) {
                        struct include * r;
                        for (r = t->includes; r; r = r->next) {
                            symbol * b = copy_b(r->b);
                            b = add_to_b(b, SIZE(t->b), t->b);
                            free(file);
                            file = b_to_s(b);
                            u = get_input(file);
                            lose_b(b);
                            if (u != 0) break;
                        }
                    }
                    if (u == 0) {
                        error(t, "Can't get '", SIZE(t->b), t->b, "'");
                        exit(1);
                    }
                    memmove(q, t, sizeof(struct input));
                    t->next = q;
                    t->p = u;
                    t->c = 0;
                    t->file = file;
                    t->file_needs_freeing = true;
                    t->line_number = 1;
                }
                p = t->p;
                continue;
            case -1:
               unless (t->next == 0) {
                   lose_b(p);
                   {
                       struct input * q = t->next;
                       memmove(t, q, sizeof(struct input)); p = t->p;
                       FREE(q);
                   }
                   t->get_depth--;
                   continue;
               }
               /* drop through */
                if (t->next) {
                    lose_b(p);
                    {
                        struct input * q = t->next;
                        memmove(t, q, sizeof(struct input)); p = t->p;
                        FREE(q);
                    }
                    t->get_depth--;
                    continue;
                }
                /* fall through */
            default:
                t->previous_token = t->token;
                t->token = code;
@@ -425,12 +515,17 @@ extern const char * name_of_token(int code) {
    }
 }

 extern void disable_token(struct tokeniser * t, int code) {
    t->token_disabled[code] = 1;
 }

 extern struct tokeniser * create_tokeniser(symbol * p, char * file) {
    NEW(tokeniser, t);
    t->next = 0;
    t->p = p;
    t->c = 0;
    t->file = file;
    t->file_needs_freeing = false;
    t->line_number = 1;
    t->b = create_b(0);
    t->b2 = create_b(0);
@@ -441,6 +536,8 @@ extern struct tokeniser * create_tokeniser(symbol * p, char * file) {
    t->token_held = false;
    t->token = -2;
    t->previous_token = -2;
    t->uplusmode = UPLUS_NONE;
    memset(t->token_disabled, 0, sizeof(t->token_disabled));
    return t;
 }

@@ -449,7 +546,7 @@ extern void close_tokeniser(struct tokeniser * t) {
    lose_b(t->b2);
    {
        struct m_pair * q = t->m_pairs;
        until (q == 0) {
        while (q) {
            struct m_pair * q_next = q->next;
            lose_b(q->name);
            lose_b(q->value);
@@ -459,12 +556,12 @@ extern void close_tokeniser(struct tokeniser * t) {
    }
    {
        struct input * q = t->next;
        until (q == 0) {
        while (q) {
            struct input * q_next = q->next;
            FREE(q);
            q = q_next;
        }
    }
    free(t->file);
    if (t->file_needs_freeing) free(t->file);
    FREE(t);
 }
--- a/contrib/snowball/examples/stemwords.c
+++ b/contrib/snowball/examples/stemwords.c
@@ -1,209 +0,0 @@
 /* This is a simple program which uses libstemmer to provide a command
 * line interface for stemming using any of the algorithms provided.
 */

 #include <stdio.h>
 #include <stdlib.h> /* for malloc, free */
 #include <string.h> /* for memmove */
 #include <ctype.h>  /* for isupper, tolower */

 #include "libstemmer.h"

 const char * progname;
 static int pretty = 1;

 static void
 stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out)
 {
 #define INC 10
    int lim = INC;
    sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol));

    while(1) {
        int ch = getc(f_in);
        if (ch == EOF) {
            free(b); return;
        }
        {
            int i = 0;
 	    int inlen = 0;
            while(1) {
                if (ch == '\n' || ch == EOF) break;
                if (i == lim) {
                    sb_symbol * newb;
 		    newb = (sb_symbol *)
 			    realloc(b, (lim + INC) * sizeof(sb_symbol));
 		    if (newb == 0) goto error;
 		    b = newb;
                    lim = lim + INC;
                }
 		/* Update count of utf-8 characters. */
 		if (ch < 0x80 || ch > 0xBF) inlen += 1;
                /* force lower case: */
                if (isupper(ch)) ch = tolower(ch);

                b[i] = ch;
 		i++;
                ch = getc(f_in);
            }

 	    {
 		const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i);
                if (stemmed == NULL)
                {
                    fprintf(stderr, "Out of memory");
                    exit(1);
                }
                else
 		{
 		    if (pretty == 1) {
 			fwrite(b, i, 1, f_out);
 			fputs(" -> ", f_out);
 		    } else if (pretty == 2) {
 			fwrite(b, i, 1, f_out);
 			if (sb_stemmer_length(stemmer) > 0) {
 			    int j;
 			    if (inlen < 30) {
 				for (j = 30 - inlen; j > 0; j--)
 				    fputs(" ", f_out);
 			    } else {
 				fputs("\n", f_out);
 				for (j = 30; j > 0; j--)
 				    fputs(" ", f_out);
 			    }
 			}
 		    }

 		    fputs((const char *)stemmed, f_out);
 		    putc('\n', f_out);
 		}
            }
        }
    }
 error:
    if (b != 0) free(b);
    return;
 }

 /** Display the command line syntax, and then exit.
 *  @param n The value to exit with.
 */
 static void
 usage(int n)
 {
    printf("usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
 	  "\n"
 	  "The input file consists of a list of words to be stemmed, one per\n"
 	  "line. Words should be in lower case, but (for English) A-Z letters\n"
 	  "are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
 	  "used.\n"
 	  "\n"
 	  "If -c is given, the argument is the character encoding of the input\n"
          "and output files.  If it is omitted, the UTF-8 encoding is used.\n"
 	  "\n"
 	  "If -p is given the output file consists of each word of the input\n"
 	  "file followed by \"->\" followed by its stemmed equivalent.\n"
 	  "If -p2 is given the output file is a two column layout containing\n"
 	  "the input words in the first column and the stemmed equivalents in\n"
 	  "the second column.\n"
 	  "Otherwise, the output file consists of the stemmed words, one per\n"
 	  "line.\n"
 	  "\n"
 	  "-h displays this help\n",
 	  progname);
    exit(n);
 }

 int
 main(int argc, char * argv[])
 {
    char * in = 0;
    char * out = 0;
    FILE * f_in;
    FILE * f_out;
    struct sb_stemmer * stemmer;

    char * language = "english";
    char * charenc = NULL;

    char * s;
    int i = 1;
    pretty = 0;

    progname = argv[0];

    while(i < argc) {
 	s = argv[i++];
 	if (s[0] == '-') {
 	    if (strcmp(s, "-o") == 0) {
 		if (i >= argc) {
 		    fprintf(stderr, "%s requires an argument\n", s);
 		    exit(1);
 		}
 		out = argv[i++];
 	    } else if (strcmp(s, "-i") == 0) {
 		if (i >= argc) {
 		    fprintf(stderr, "%s requires an argument\n", s);
 		    exit(1);
 		}
 		in = argv[i++];
 	    } else if (strcmp(s, "-l") == 0) {
 		if (i >= argc) {
 		    fprintf(stderr, "%s requires an argument\n", s);
 		    exit(1);
 		}
 		language = argv[i++];
 	    } else if (strcmp(s, "-c") == 0) {
 		if (i >= argc) {
 		    fprintf(stderr, "%s requires an argument\n", s);
 		    exit(1);
 		}
 		charenc = argv[i++];
 	    } else if (strcmp(s, "-p2") == 0) {
 		pretty = 2;
 	    } else if (strcmp(s, "-p") == 0) {
 		pretty = 1;
 	    } else if (strcmp(s, "-h") == 0) {
 		usage(0);
 	    } else {
 		fprintf(stderr, "option %s unknown\n", s);
 		usage(1);
 	    }
 	} else {
 	    fprintf(stderr, "unexpected parameter %s\n", s);
 	    usage(1);
 	}
    }

    /* prepare the files */
    f_in = (in == 0) ? stdin : fopen(in, "r");
    if (f_in == 0) {
 	fprintf(stderr, "file %s not found\n", in);
 	exit(1);
    }
    f_out = (out == 0) ? stdout : fopen(out, "w");
    if (f_out == 0) {
 	fprintf(stderr, "file %s cannot be opened\n", out);
 	exit(1);
    }

    /* do the stemming process: */
    stemmer = sb_stemmer_new(language, charenc);
    if (stemmer == 0) {
        if (charenc == NULL) {
            fprintf(stderr, "language `%s' not available for stemming\n", language);
            exit(1);
        } else {
            fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
            exit(1);
        }
    }
    stem_file(stemmer, f_in, f_out);
    sb_stemmer_delete(stemmer);

    if (in != 0) (void) fclose(f_in);
    if (out != 0) (void) fclose(f_out);

    return 0;
 }

--- a/contrib/snowball/include/libstemmer.h
+++ b/contrib/snowball/include/libstemmer.h
@@ -32,9 +32,9 @@ const char ** sb_stemmer_list(void);
 *
 *  @param charenc The character encoding.  NULL may be passed as
 *  this value, in which case UTF-8 encoding will be assumed. Otherwise,
 *  the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1),
 *  "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian).  Note that
 *  case is significant in this parameter.
 *  the argument may be one of "UTF_8", "ISO_8859_1" (i.e. Latin 1),
 *  "ISO_8859_2" (i.e. Latin 2) or "KOI8_R" (Russian).  Note that case is
 *  significant in this parameter.
 *
 *  @return NULL if the specified algorithm is not recognised, or the
 *  algorithm is not available for the requested encoding.  Otherwise,
@@ -66,7 +66,7 @@ void                sb_stemmer_delete(struct sb_stemmer * stemmer);
 *  If an out-of-memory error occurs, this will return NULL.
 */
 const sb_symbol *   sb_stemmer_stem(struct sb_stemmer * stemmer,
 				    const sb_symbol * word, int size);
                                    const sb_symbol * word, int size);

 /** Get the length of the result of the last stemmed word.
 *  This should not be called before sb_stemmer_stem() has been called.
@@ -76,4 +76,3 @@ int                 sb_stemmer_length(struct sb_stemmer * stemmer);
 #ifdef __cplusplus
 }
 #endif

--- a/contrib/snowball/java/org/tartarus/snowball/Among.java
+++ b/contrib/snowball/java/org/tartarus/snowball/Among.java
@@ -1,31 +0,0 @@
 package org.tartarus.snowball;

 import java.lang.reflect.Method;

 public class Among {
    public Among (String s, int substring_i, int result,
 		  String methodname, SnowballProgram methodobject) {
        this.s_size = s.length();
        this.s = s.toCharArray();
        this.substring_i = substring_i;
 	this.result = result;
 	this.methodobject = methodobject;
 	if (methodname.length() == 0) {
 	    this.method = null;
 	} else {
 	    try {
 		this.method = methodobject.getClass().
 		getDeclaredMethod(methodname, new Class[0]);
 	    } catch (NoSuchMethodException e) {
 		throw new RuntimeException(e);
 	    }
 	}
    }

    public final int s_size; /* search string */
    public final char[] s; /* search string */
    public final int substring_i; /* index to longest matching substring */
    public final int result; /* result of the lookup */
    public final Method method; /* method to use if substring matches */
    public final SnowballProgram methodobject; /* object to invoke method on */
 };
--- a/contrib/snowball/java/org/tartarus/snowball/SnowballProgram.java
+++ b/contrib/snowball/java/org/tartarus/snowball/SnowballProgram.java
@@ -1,432 +0,0 @@

 package org.tartarus.snowball;
 import java.lang.reflect.InvocationTargetException;

 public class SnowballProgram {
    protected SnowballProgram()
    {
 	current = new StringBuffer();
 	setCurrent("");
    }

    /**
     * Set the current string.
     */
    public void setCurrent(String value)
    {
 	current.replace(0, current.length(), value);
 	cursor = 0;
 	limit = current.length();
 	limit_backward = 0;
 	bra = cursor;
 	ket = limit;
    }

    /**
     * Get the current string.
     */
    public String getCurrent()
    {
        String result = current.toString();
        // Make a new StringBuffer.  If we reuse the old one, and a user of
        // the library keeps a reference to the buffer returned (for example,
        // by converting it to a String in a way which doesn't force a copy),
        // the buffer size will not decrease, and we will risk wasting a large
        // amount of memory.
        // Thanks to Wolfram Esser for spotting this problem.
        current = new StringBuffer();
        return result;
    }

    // current string
    protected StringBuffer current;

    protected int cursor;
    protected int limit;
    protected int limit_backward;
    protected int bra;
    protected int ket;

    protected void copy_from(SnowballProgram other)
    {
 	current          = other.current;
 	cursor           = other.cursor;
 	limit            = other.limit;
 	limit_backward   = other.limit_backward;
 	bra              = other.bra;
 	ket              = other.ket;
    }

    protected boolean in_grouping(char [] s, int min, int max)
    {
 	if (cursor >= limit) return false;
 	char ch = current.charAt(cursor);
 	if (ch > max || ch < min) return false;
 	ch -= min;
 	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
 	cursor++;
 	return true;
    }

    protected boolean in_grouping_b(char [] s, int min, int max)
    {
 	if (cursor <= limit_backward) return false;
 	char ch = current.charAt(cursor - 1);
 	if (ch > max || ch < min) return false;
 	ch -= min;
 	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
 	cursor--;
 	return true;
    }

    protected boolean out_grouping(char [] s, int min, int max)
    {
 	if (cursor >= limit) return false;
 	char ch = current.charAt(cursor);
 	if (ch > max || ch < min) {
 	    cursor++;
 	    return true;
 	}
 	ch -= min;
 	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
 	    cursor ++;
 	    return true;
 	}
 	return false;
    }

    protected boolean out_grouping_b(char [] s, int min, int max)
    {
 	if (cursor <= limit_backward) return false;
 	char ch = current.charAt(cursor - 1);
 	if (ch > max || ch < min) {
 	    cursor--;
 	    return true;
 	}
 	ch -= min;
 	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
 	    cursor--;
 	    return true;
 	}
 	return false;
    }

    protected boolean in_range(int min, int max)
    {
 	if (cursor >= limit) return false;
 	char ch = current.charAt(cursor);
 	if (ch > max || ch < min) return false;
 	cursor++;
 	return true;
    }

    protected boolean in_range_b(int min, int max)
    {
 	if (cursor <= limit_backward) return false;
 	char ch = current.charAt(cursor - 1);
 	if (ch > max || ch < min) return false;
 	cursor--;
 	return true;
    }

    protected boolean out_range(int min, int max)
    {
 	if (cursor >= limit) return false;
 	char ch = current.charAt(cursor);
 	if (!(ch > max || ch < min)) return false;
 	cursor++;
 	return true;
    }

    protected boolean out_range_b(int min, int max)
    {
 	if (cursor <= limit_backward) return false;
 	char ch = current.charAt(cursor - 1);
 	if(!(ch > max || ch < min)) return false;
 	cursor--;
 	return true;
    }

    protected boolean eq_s(int s_size, String s)
    {
 	if (limit - cursor < s_size) return false;
 	int i;
 	for (i = 0; i != s_size; i++) {
 	    if (current.charAt(cursor + i) != s.charAt(i)) return false;
 	}
 	cursor += s_size;
 	return true;
    }

    protected boolean eq_s_b(int s_size, String s)
    {
 	if (cursor - limit_backward < s_size) return false;
 	int i;
 	for (i = 0; i != s_size; i++) {
 	    if (current.charAt(cursor - s_size + i) != s.charAt(i)) return false;
 	}
 	cursor -= s_size;
 	return true;
    }

    protected boolean eq_v(CharSequence s)
    {
 	return eq_s(s.length(), s.toString());
    }

    protected boolean eq_v_b(CharSequence s)
    {   return eq_s_b(s.length(), s.toString());
    }

    protected int find_among(Among v[], int v_size)
    {
 	int i = 0;
 	int j = v_size;

 	int c = cursor;
 	int l = limit;

 	int common_i = 0;
 	int common_j = 0;

 	boolean first_key_inspected = false;

 	while(true) {
 	    int k = i + ((j - i) >> 1);
 	    int diff = 0;
 	    int common = common_i < common_j ? common_i : common_j; // smaller
 	    Among w = v[k];
 	    int i2;
 	    for (i2 = common; i2 < w.s_size; i2++) {
 		if (c + common == l) {
 		    diff = -1;
 		    break;
 		}
 		diff = current.charAt(c + common) - w.s[i2];
 		if (diff != 0) break;
 		common++;
 	    }
 	    if (diff < 0) {
 		j = k;
 		common_j = common;
 	    } else {
 		i = k;
 		common_i = common;
 	    }
 	    if (j - i <= 1) {
 		if (i > 0) break; // v->s has been inspected
 		if (j == i) break; // only one item in v

 		// - but now we need to go round once more to get
 		// v->s inspected. This looks messy, but is actually
 		// the optimal approach.

 		if (first_key_inspected) break;
 		first_key_inspected = true;
 	    }
 	}
 	while(true) {
 	    Among w = v[i];
 	    if (common_i >= w.s_size) {
 		cursor = c + w.s_size;
 		if (w.method == null) return w.result;
 		boolean res;
 		try {
 		    Object resobj = w.method.invoke(w.methodobject,
 						    new Object[0]);
 		    res = resobj.toString().equals("true");
 		} catch (InvocationTargetException e) {
 		    res = false;
 		    // FIXME - debug message
 		} catch (IllegalAccessException e) {
 		    res = false;
 		    // FIXME - debug message
 		}
 		cursor = c + w.s_size;
 		if (res) return w.result;
 	    }
 	    i = w.substring_i;
 	    if (i < 0) return 0;
 	}
    }

    // find_among_b is for backwards processing. Same comments apply
    protected int find_among_b(Among v[], int v_size)
    {
 	int i = 0;
 	int j = v_size;

 	int c = cursor;
 	int lb = limit_backward;

 	int common_i = 0;
 	int common_j = 0;

 	boolean first_key_inspected = false;

 	while(true) {
 	    int k = i + ((j - i) >> 1);
 	    int diff = 0;
 	    int common = common_i < common_j ? common_i : common_j;
 	    Among w = v[k];
 	    int i2;
 	    for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
 		if (c - common == lb) {
 		    diff = -1;
 		    break;
 		}
 		diff = current.charAt(c - 1 - common) - w.s[i2];
 		if (diff != 0) break;
 		common++;
 	    }
 	    if (diff < 0) {
 		j = k;
 		common_j = common;
 	    } else {
 		i = k;
 		common_i = common;
 	    }
 	    if (j - i <= 1) {
 		if (i > 0) break;
 		if (j == i) break;
 		if (first_key_inspected) break;
 		first_key_inspected = true;
 	    }
 	}
 	while(true) {
 	    Among w = v[i];
 	    if (common_i >= w.s_size) {
 		cursor = c - w.s_size;
 		if (w.method == null) return w.result;

 		boolean res;
 		try {
 		    Object resobj = w.method.invoke(w.methodobject,
 						    new Object[0]);
 		    res = resobj.toString().equals("true");
 		} catch (InvocationTargetException e) {
 		    res = false;
 		    // FIXME - debug message
 		} catch (IllegalAccessException e) {
 		    res = false;
 		    // FIXME - debug message
 		}
 		cursor = c - w.s_size;
 		if (res) return w.result;
 	    }
 	    i = w.substring_i;
 	    if (i < 0) return 0;
 	}
    }

    /* to replace chars between c_bra and c_ket in current by the
     * chars in s.
     */
    protected int replace_s(int c_bra, int c_ket, String s)
    {
 	int adjustment = s.length() - (c_ket - c_bra);
 	current.replace(c_bra, c_ket, s);
 	limit += adjustment;
 	if (cursor >= c_ket) cursor += adjustment;
 	else if (cursor > c_bra) cursor = c_bra;
 	return adjustment;
    }

    protected void slice_check()
    {
 	if (bra < 0 ||
 	    bra > ket ||
 	    ket > limit ||
 	    limit > current.length())   // this line could be removed
 	{
 	    System.err.println("faulty slice operation");
 	// FIXME: report error somehow.
 	/*
 	    fprintf(stderr, "faulty slice operation:\n");
 	    debug(z, -1, 0);
 	    exit(1);
 	    */
 	}
    }

    protected void slice_from(String s)
    {
 	slice_check();
 	replace_s(bra, ket, s);
    }

    protected void slice_from(CharSequence s)
    {
        slice_from(s.toString());
    }

    protected void slice_del()
    {
 	slice_from("");
    }

    protected void insert(int c_bra, int c_ket, String s)
    {
 	int adjustment = replace_s(c_bra, c_ket, s);
 	if (c_bra <= bra) bra += adjustment;
 	if (c_bra <= ket) ket += adjustment;
    }

    protected void insert(int c_bra, int c_ket, CharSequence s)
    {
 	insert(c_bra, c_ket, s.toString());
    }

    /* Copy the slice into the supplied StringBuffer */
    protected StringBuffer slice_to(StringBuffer s)
    {
 	slice_check();
 	int len = ket - bra;
 	s.replace(0, s.length(), current.substring(bra, ket));
 	return s;
    }

    /* Copy the slice into the supplied StringBuilder */
    protected StringBuilder slice_to(StringBuilder s)
    {
 	slice_check();
 	int len = ket - bra;
 	s.replace(0, s.length(), current.substring(bra, ket));
 	return s;
    }

    protected StringBuffer assign_to(StringBuffer s)
    {
 	s.replace(0, s.length(), current.substring(0, limit));
 	return s;
    }

    protected StringBuilder assign_to(StringBuilder s)
    {
 	s.replace(0, s.length(), current.substring(0, limit));
 	return s;
    }

 /*
 extern void debug(struct SN_env * z, int number, int line_count)
 {   int i;
    int limit = SIZE(z->p);
    //if (number >= 0) printf("%3d (line %4d): '", number, line_count);
    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
    for (i = 0; i <= limit; i++)
    {   if (z->lb == i) printf("{");
        if (z->bra == i) printf("[");
        if (z->c == i) printf("|");
        if (z->ket == i) printf("]");
        if (z->l == i) printf("}");
        if (i < limit)
        {   int ch = z->p[i];
            if (ch == 0) ch = '#';
            printf("%c", ch);
        }
    }
    printf("'\n");
 }
 */

 };
--- a/contrib/snowball/java/org/tartarus/snowball/SnowballStemmer.java
+++ b/contrib/snowball/java/org/tartarus/snowball/SnowballStemmer.java
@@ -1,7 +0,0 @@

 package org.tartarus.snowball;
 import java.lang.reflect.InvocationTargetException;

 public abstract class SnowballStemmer extends SnowballProgram {
    public abstract boolean stem();
 };
--- a/contrib/snowball/java/org/tartarus/snowball/TestApp.java
+++ b/contrib/snowball/java/org/tartarus/snowball/TestApp.java
@@ -1,77 +0,0 @@

 package org.tartarus.snowball;

 import java.lang.reflect.Method;
 import java.io.Reader;
 import java.io.Writer;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.FileInputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.OutputStream;
 import java.io.FileOutputStream;

 public class TestApp {
    private static void usage()
    {
        System.err.println("Usage: TestApp <algorithm> <input file> [-o <output file>]");
    }

    public static void main(String [] args) throws Throwable {
 	if (args.length < 2) {
            usage();
            return;
        }

 	Class stemClass = Class.forName("org.tartarus.snowball.ext." +
 					args[0] + "Stemmer");
        SnowballStemmer stemmer = (SnowballStemmer) stemClass.newInstance();

 	Reader reader;
 	reader = new InputStreamReader(new FileInputStream(args[1]));
 	reader = new BufferedReader(reader);

 	StringBuffer input = new StringBuffer();

        OutputStream outstream;

 	if (args.length > 2) {
            if (args.length >= 4 && args[2].equals("-o")) {
                outstream = new FileOutputStream(args[3]);
            } else {
                usage();
                return;
            }
 	} else {
 	    outstream = System.out;
 	}
 	Writer output = new OutputStreamWriter(outstream);
 	output = new BufferedWriter(output);

 	int repeat = 1;
 	if (args.length > 4) {
 	    repeat = Integer.parseInt(args[4]);
 	}

 	Object [] emptyArgs = new Object[0];
 	int character;
 	while ((character = reader.read()) != -1) {
 	    char ch = (char) character;
 	    if (Character.isWhitespace((char) ch)) {
 		if (input.length() > 0) {
 		    stemmer.setCurrent(input.toString());
 		    for (int i = repeat; i != 0; i--) {
 			stemmer.stem();
 		    }
 		    output.write(stemmer.getCurrent());
 		    output.write('\n');
 		    input.delete(0, input.length());
 		}
 	    } else {
 		input.append(Character.toLowerCase(ch));
 	    }
 	}
 	output.flush();
    }
 }
--- a/contrib/snowball/libstemmer/libstemmer_c.in
+++ b/contrib/snowball/libstemmer/libstemmer_c.in
@@ -22,10 +22,10 @@ sb_stemmer_list(void)
 static stemmer_encoding_t
 sb_getenc(const char * charenc)
 {
    struct stemmer_encoding * encoding;
    const struct stemmer_encoding * encoding;
    if (charenc == NULL) return ENC_UTF_8;
    for (encoding = encodings; encoding->name != 0; encoding++) {
 	if (strcmp(encoding->name, charenc) == 0) break;
        if (strcmp(encoding->name, charenc) == 0) break;
    }
    if (encoding->name == NULL) return ENC_UNKNOWN;
    return encoding->enc;
@@ -35,14 +35,14 @@ extern struct sb_stemmer *
 sb_stemmer_new(const char * algorithm, const char * charenc)
 {
    stemmer_encoding_t enc;
    struct stemmer_modules * module;
    const struct stemmer_modules * module;
    struct sb_stemmer * stemmer;

    enc = sb_getenc(charenc);
    if (enc == ENC_UNKNOWN) return NULL;

    for (module = modules; module->name != 0; module++) {
 	if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
        if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
    }
    if (module->name == NULL) return NULL;
    
@@ -67,9 +67,10 @@ void
 sb_stemmer_delete(struct sb_stemmer * stemmer)
 {
    if (stemmer == 0) return;
    if (stemmer->close == 0) return;
    stemmer->close(stemmer->env);
    stemmer->close = 0;
    if (stemmer->close) {
        stemmer->close(stemmer->env);
        stemmer->close = 0;
    }
    free(stemmer);
 }

--- a/contrib/snowball/libstemmer/mkmodules.pl
+++ b/contrib/snowball/libstemmer/mkmodules.pl
@@ -1,10 +1,12 @@
 #!/usr/bin/perl -w
 #!/usr/bin/env perl
 use strict;
 use 5.006;
 use warnings;

 my $progname = $0;

 if (scalar @ARGV < 4 || scalar @ARGV > 5) {
  print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<extn>]\n";
  print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<enc>]\n";
  exit 1;
 }

@@ -12,9 +14,11 @@ my $outname = shift(@ARGV);
 my $c_src_dir = shift(@ARGV);
 my $descfile = shift(@ARGV);
 my $srclistfile = shift(@ARGV);
 my $enc_only;
 my $extn = '';
 if (@ARGV) {
  $extn = '_'.shift(@ARGV);
  $enc_only = shift(@ARGV);
  $extn = '_'.$enc_only;
 }

 my %aliases = ();
@@ -27,6 +31,14 @@ sub addalgenc($$) {
  my $alg = shift();
  my $enc = shift();

  if (defined $enc_only) {
      my $norm_enc = lc $enc;
      $norm_enc =~ s/_//g;
      if ($norm_enc ne $enc_only) {
 	  return;
      }
  }

  if (defined $algorithm_encs{$alg}) {
      my $hashref = $algorithm_encs{$alg};
      $$hashref{$enc}=1;
@@ -42,7 +54,7 @@ sub readinput()
 {
    open DESCFILE, $descfile;
    my $line;
    while($line = <DESCFILE>)
    while ($line = <DESCFILE>)
    {
        next if $line =~ m/^\s*#/;
        next if $line =~ m/^\s*$/;
@@ -123,7 +135,7 @@ struct stemmer_encoding {
  const char * name;
  stemmer_encoding_t enc;
 };
 static struct stemmer_encoding encodings[] = {
 static const struct stemmer_encoding encodings[] = {
 EOS
    for $enc (sort keys %encs) {
        print OUT "  {\"${enc}\", ENC_${enc}},\n";
@@ -139,7 +151,7 @@ struct stemmer_modules {
  void (*close)(struct SN_env *);
  int (*stem)(struct SN_env *);
 };
 static struct stemmer_modules modules[] = {
 static const struct stemmer_modules modules[] = {
 EOS

    for $lang (sort keys %aliases) {
@@ -162,7 +174,6 @@ static const char * algorithm_names[] = {
 EOS

    for $lang (@algorithms) {
        my $l = $aliases{$lang};
        print OUT "  \"$lang\", \n";
    }

--- a/contrib/snowball/libstemmer/modules.txt
+++ b/contrib/snowball/libstemmer/modules.txt
@@ -9,27 +9,35 @@
 # List all the main algorithms for each language, in UTF-8, and also with
 # the most commonly used encoding.

 danish          UTF_8,ISO_8859_1        danish,da,dan
 dutch           UTF_8,ISO_8859_1        dutch,nl,dut,nld
 english         UTF_8,ISO_8859_1        english,en,eng
 finnish         UTF_8,ISO_8859_1        finnish,fi,fin
 french          UTF_8,ISO_8859_1        french,fr,fre,fra
 german          UTF_8,ISO_8859_1        german,de,ger,deu
 hungarian       UTF_8,ISO_8859_2        hungarian,hu,hun
 italian         UTF_8,ISO_8859_1        italian,it,ita
 norwegian       UTF_8,ISO_8859_1        norwegian,no,nor
 portuguese      UTF_8,ISO_8859_1        portuguese,pt,por
 romanian        UTF_8,ISO_8859_2        romanian,ro,rum,ron
 russian         UTF_8,KOI8_R            russian,ru,rus
 spanish         UTF_8,ISO_8859_1        spanish,es,esl,spa
 swedish         UTF_8,ISO_8859_1        swedish,sv,swe
 arabic          UTF_8                   arabic,ar,ara
 danish          UTF_8        danish,da,dan
 dutch           UTF_8        dutch,nl,dut,nld
 english         UTF_8        english,en,eng
 finnish         UTF_8        finnish,fi,fin
 french          UTF_8        french,fr,fre,fra
 german          UTF_8        german,de,ger,deu
 greek           UTF_8                   greek,el,gre,ell
 hindi           UTF_8                   hindi,hi,hin
 hungarian       UTF_8        hungarian,hu,hun
 indonesian      UTF_8        indonesian,id,ind
 italian         UTF_8        italian,it,ita
 lithuanian      UTF_8                   lithuanian,lt,lit
 nepali          UTF_8                   nepali,ne,nep
 norwegian       UTF_8        norwegian,no,nor
 portuguese      UTF_8        portuguese,pt,por
 romanian        UTF_8        romanian,ro,rum,ron
 russian         UTF_8            russian,ru,rus
 serbian         UTF_8                   serbian,sr,srp
 spanish         UTF_8        spanish,es,esl,spa
 swedish         UTF_8        swedish,sv,swe
 tamil           UTF_8                   tamil,ta,tam
 turkish         UTF_8                   turkish,tr,tur

 # Also include the traditional porter algorithm for english.
 # The porter algorithm is included in the libstemmer distribution to assist
 # with backwards compatibility, but for new systems the english algorithm
 # should be used in preference.
 porter          UTF_8,ISO_8859_1        porter
 porter          UTF_8        porter			english

 # Some other stemmers in the snowball project are not included in the standard
 # distribution. To compile a libstemmer with them in, add them to this list,
@@ -39,12 +47,12 @@ porter          UTF_8,ISO_8859_1        porter
 # algorithms are:
 #
 # german2          - This is a slight modification of the german stemmer.
 #german2          UTF_8,ISO_8859_1        german2
 #german2          UTF_8,ISO_8859_1        german2		german
 #
 # kraaij_pohlmann  - This is a different dutch stemmer.
 #kraaij_pohlmann  UTF_8,ISO_8859_1        kraaij_pohlmann
 #kraaij_pohlmann  UTF_8,ISO_8859_1        kraaij_pohlmann	dutch
 #
 # lovins           - This is an english stemmer, but fairly outdated, and
 #                    only really applicable to a restricted type of input text
 #                    (keywords in academic publications).
 #lovins           UTF_8,ISO_8859_1        lovins
 #lovins           UTF_8,ISO_8859_1        lovins		english
--- a/contrib/snowball/runtime/api.c
+++ b/contrib/snowball/runtime/api.c
@@ -2,7 +2,7 @@
 #include <stdlib.h> /* for calloc, free */
 #include "header.h"

 extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
 extern struct SN_env * SN_create_env(int S_size, int I_size)
 {
    struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
    if (z == NULL) return NULL;
@@ -27,12 +27,6 @@ extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
        if (z->I == NULL) goto error;
    }

    if (B_size)
    {
        z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
        if (z->B == NULL) goto error;
    }

    return z;
 error:
    SN_close_env(z, S_size);
@@ -52,7 +46,6 @@ extern void SN_close_env(struct SN_env * z, int S_size)
        free(z->S);
    }
    free(z->I);
    free(z->B);
    if (z->p) lose_s(z->p);
    free(z);
 }
@@ -63,4 +56,3 @@ extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
    z->c = 0;
    return err;
 }

--- a/contrib/snowball/runtime/api.h
+++ b/contrib/snowball/runtime/api.h
@@ -16,11 +16,17 @@ struct SN_env {
    int c; int l; int lb; int bra; int ket;
    symbol * * S;
    int * I;
    unsigned char * B;
 };

 extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
 #ifdef __cplusplus
 extern "C" {
 #endif

 extern struct SN_env * SN_create_env(int S_size, int I_size);
 extern void SN_close_env(struct SN_env * z, int S_size);

 extern int SN_set_current(struct SN_env * z, int size, const symbol * s);

 #ifdef __cplusplus
 }
 #endif
--- a/contrib/snowball/runtime/header.h
+++ b/contrib/snowball/runtime/header.h
@@ -54,5 +54,6 @@ extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
 extern symbol * slice_to(struct SN_env * z, symbol * p);
 extern symbol * assign_to(struct SN_env * z, symbol * p);

 extern void debug(struct SN_env * z, int number, int line_count);
 extern int len_utf8(const symbol * p);

 extern void debug(struct SN_env * z, int number, int line_count);
--- a/contrib/snowball/runtime/utilities.c
+++ b/contrib/snowball/runtime/utilities.c
@@ -5,8 +5,6 @@

 #include "header.h"

 #define unless(C) if(!(C))

 #define CREATE_SIZE 1

 extern symbol * create_s(void) {
@@ -15,7 +13,7 @@ extern symbol * create_s(void) {
    if (mem == NULL) return NULL;
    p = (symbol *) (HEAD + (char *) mem);
    CAPACITY(p) = CREATE_SIZE;
    SET_SIZE(p, CREATE_SIZE);
    SET_SIZE(p, 0);
    return p;
 }

@@ -27,7 +25,7 @@ extern void lose_s(symbol * p) {
 /*
   new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
   if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
   position, or 0 on failure.
   position, or -1 on failure.

   -- used to implement hop and next in the utf8 case.
 */
@@ -66,77 +64,95 @@ extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
 /* Code for character groupings: utf8 cases */

 static int get_utf8(const symbol * p, int c, int l, int * slot) {
    int b0, b1;
    int b0, b1, b2;
    if (c >= l) return 0;
    b0 = p[c++];
    if (b0 < 0xC0 || c == l) {   /* 1100 0000 */
        * slot = b0; return 1;
        *slot = b0;
        return 1;
    }
    b1 = p[c++];
    b1 = p[c++] & 0x3F;
    if (b0 < 0xE0 || c == l) {   /* 1110 0000 */
        * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
        *slot = (b0 & 0x1F) << 6 | b1;
        return 2;
    }
    b2 = p[c++] & 0x3F;
    if (b0 < 0xF0 || c == l) {   /* 1111 0000 */
        *slot = (b0 & 0xF) << 12 | b1 << 6 | b2;
        return 3;
    }
    * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
    *slot = (b0 & 0xE) << 18 | b1 << 12 | b2 << 6 | (p[c] & 0x3F);
    return 4;
 }

 static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
    int b0, b1;
    int a, b;
    if (c <= lb) return 0;
    b0 = p[--c];
    if (b0 < 0x80 || c == lb) {   /* 1000 0000 */
        * slot = b0; return 1;
    b = p[--c];
    if (b < 0x80 || c == lb) {   /* 1000 0000 */
        *slot = b;
        return 1;
    }
    b1 = p[--c];
    if (b1 >= 0xC0 || c == lb) {   /* 1100 0000 */
        * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
    a = b & 0x3F;
    b = p[--c];
    if (b >= 0xC0 || c == lb) {   /* 1100 0000 */
        *slot = (b & 0x1F) << 6 | a;
        return 2;
    }
    * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
    a |= (b & 0x3F) << 6;
    b = p[--c];
    if (b >= 0xE0 || c == lb) {   /* 1110 0000 */
        *slot = (b & 0xF) << 12 | a;
        return 3;
    }
    *slot = (p[--c] & 0xE) << 18 | (b & 0x3F) << 12 | a;
    return 4;
 }

 extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
    do {
 	int ch;
 	int w = get_utf8(z->p, z->c, z->l, & ch);
 	unless (w) return -1;
 	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
 	    return w;
 	z->c += w;
        int ch;
        int w = get_utf8(z->p, z->c, z->l, & ch);
        if (!w) return -1;
        if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
            return w;
        z->c += w;
    } while (repeat);
    return 0;
 }

 extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
    do {
 	int ch;
 	int w = get_b_utf8(z->p, z->c, z->lb, & ch);
 	unless (w) return -1;
 	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
 	    return w;
 	z->c -= w;
        int ch;
        int w = get_b_utf8(z->p, z->c, z->lb, & ch);
        if (!w) return -1;
        if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
            return w;
        z->c -= w;
    } while (repeat);
    return 0;
 }

 extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
    do {
 	int ch;
 	int w = get_utf8(z->p, z->c, z->l, & ch);
 	unless (w) return -1;
 	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
 	    return w;
 	z->c += w;
        int ch;
        int w = get_utf8(z->p, z->c, z->l, & ch);
        if (!w) return -1;
        if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
            return w;
        z->c += w;
    } while (repeat);
    return 0;
 }

 extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
    do {
 	int ch;
 	int w = get_b_utf8(z->p, z->c, z->lb, & ch);
 	unless (w) return -1;
 	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
 	    return w;
 	z->c -= w;
        int ch;
        int w = get_b_utf8(z->p, z->c, z->lb, & ch);
        if (!w) return -1;
        if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
            return w;
        z->c -= w;
    } while (repeat);
    return 0;
 }
@@ -145,48 +161,48 @@ extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min,

 extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
    do {
 	int ch;
 	if (z->c >= z->l) return -1;
 	ch = z->p[z->c];
 	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
 	    return 1;
 	z->c++;
        int ch;
        if (z->c >= z->l) return -1;
        ch = z->p[z->c];
        if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
            return 1;
        z->c++;
    } while (repeat);
    return 0;
 }

 extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
    do {
 	int ch;
 	if (z->c <= z->lb) return -1;
 	ch = z->p[z->c - 1];
 	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
 	    return 1;
 	z->c--;
        int ch;
        if (z->c <= z->lb) return -1;
        ch = z->p[z->c - 1];
        if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
            return 1;
        z->c--;
    } while (repeat);
    return 0;
 }

 extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
    do {
 	int ch;
 	if (z->c >= z->l) return -1;
 	ch = z->p[z->c];
 	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
 	    return 1;
 	z->c++;
        int ch;
        if (z->c >= z->l) return -1;
        ch = z->p[z->c];
        if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
            return 1;
        z->c++;
    } while (repeat);
    return 0;
 }

 extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
    do {
 	int ch;
 	if (z->c <= z->lb) return -1;
 	ch = z->p[z->c - 1];
 	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
 	    return 1;
 	z->c--;
        int ch;
        if (z->c <= z->lb) return -1;
        ch = z->p[z->c - 1];
        if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
            return 1;
        z->c--;
    } while (repeat);
    return 0;
 }
@@ -215,7 +231,7 @@ extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
    int j = v_size;

    int c = z->c; int l = z->l;
    symbol * q = z->p + c;
    const symbol * q = z->p + c;

    const struct among * w;

@@ -224,7 +240,7 @@ extern int find_among(struct SN_env * z, const struct among * v, int v_size) {

    int first_key_inspected = 0;

    while(1) {
    while (1) {
        int k = i + ((j - i) >> 1);
        int diff = 0;
        int common = common_i < common_j ? common_i : common_j; /* smaller */
@@ -237,8 +253,13 @@ extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
                common++;
            }
        }
        if (diff < 0) { j = k; common_j = common; }
                 else { i = k; common_i = common; }
        if (diff < 0) {
            j = k;
            common_j = common;
        } else {
            i = k;
            common_i = common;
        }
        if (j - i <= 1) {
            if (i > 0) break; /* v->s has been inspected */
            if (j == i) break; /* only one item in v */
@@ -251,7 +272,7 @@ extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
            first_key_inspected = 1;
        }
    }
    while(1) {
    while (1) {
        w = v + i;
        if (common_i >= w->s_size) {
            z->c = c + w->s_size;
@@ -275,7 +296,7 @@ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
    int j = v_size;

    int c = z->c; int lb = z->lb;
    symbol * q = z->p + c - 1;
    const symbol * q = z->p + c - 1;

    const struct among * w;

@@ -284,7 +305,7 @@ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {

    int first_key_inspected = 0;

    while(1) {
    while (1) {
        int k = i + ((j - i) >> 1);
        int diff = 0;
        int common = common_i < common_j ? common_i : common_j;
@@ -306,7 +327,7 @@ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
            first_key_inspected = 1;
        }
    }
    while(1) {
    while (1) {
        w = v + i;
        if (common_i >= w->s_size) {
            z->c = c - w->s_size;
@@ -367,11 +388,10 @@ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const
        z->l += adjustment;
        if (z->c >= c_ket)
            z->c += adjustment;
        else
            if (z->c > c_bra)
                z->c = c_bra;
        else if (z->c > c_bra)
            z->c = c_bra;
    }
    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
    if (s_size) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
    if (adjptr != NULL)
        *adjptr = adjustment;
    return 0;
@@ -417,12 +437,7 @@ extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbo
 }

 extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
    int adjustment;
    if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
        return -1;
    if (bra <= z->bra) z->bra += adjustment;
    if (bra <= z->ket) z->ket += adjustment;
    return 0;
    return insert_s(z, bra, ket, SIZE(p), p);
 }

 extern symbol * slice_to(struct SN_env * z, symbol * p) {
@@ -455,6 +470,16 @@ extern symbol * assign_to(struct SN_env * z, symbol * p) {
    return p;
 }

 extern int len_utf8(const symbol * p) {
    int size = SIZE(p);
    int len = 0;
    while (size--) {
        symbol b = *p++;
        if (b >= 0xC0 || b < 0x80) ++len;
    }
    return len;
 }

 #if 0
 extern void debug(struct SN_env * z, int number, int line_count) {
    int i;