Browse Source

Remove bloody submodules.

tags/1.1.0
Vsevolod Stakhov 8 years ago
parent
commit
2375dba898
83 changed files with 16245 additions and 15 deletions
  1. 0
    12
      .gitmodules
  2. 0
    1
      contrib/snowball
  3. 5
    0
      contrib/snowball/.gitignore
  4. 4
    0
      contrib/snowball/.travis.yml
  5. 27
    0
      contrib/snowball/AUTHORS
  6. 85
    0
      contrib/snowball/CMakeLists.txt
  7. 300
    0
      contrib/snowball/GNUmakefile
  8. 5
    0
      contrib/snowball/README
  9. 91
    0
      contrib/snowball/algorithms/danish/stem_ISO_8859_1.sbl
  10. 91
    0
      contrib/snowball/algorithms/danish/stem_MS_DOS_Latin_I.sbl
  11. 164
    0
      contrib/snowball/algorithms/dutch/stem_ISO_8859_1.sbl
  12. 164
    0
      contrib/snowball/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
  13. 229
    0
      contrib/snowball/algorithms/english/stem_ISO_8859_1.sbl
  14. 196
    0
      contrib/snowball/algorithms/finnish/stem_ISO_8859_1.sbl
  15. 248
    0
      contrib/snowball/algorithms/french/stem_ISO_8859_1.sbl
  16. 239
    0
      contrib/snowball/algorithms/french/stem_MS_DOS_Latin_I.sbl
  17. 139
    0
      contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl
  18. 139
    0
      contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl
  19. 145
    0
      contrib/snowball/algorithms/german2/stem_ISO_8859_1.sbl
  20. 241
    0
      contrib/snowball/algorithms/hungarian/stem_ISO_8859_2.sbl
  21. 241
    0
      contrib/snowball/algorithms/hungarian/stem_Unicode.sbl
  22. 195
    0
      contrib/snowball/algorithms/italian/stem_ISO_8859_1.sbl
  23. 195
    0
      contrib/snowball/algorithms/italian/stem_MS_DOS_Latin_I.sbl
  24. 245
    0
      contrib/snowball/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
  25. 208
    0
      contrib/snowball/algorithms/lovins/stem_ISO_8859_1.sbl
  26. 80
    0
      contrib/snowball/algorithms/norwegian/stem_ISO_8859_1.sbl
  27. 80
    0
      contrib/snowball/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
  28. 139
    0
      contrib/snowball/algorithms/porter/stem_ISO_8859_1.sbl
  29. 218
    0
      contrib/snowball/algorithms/portuguese/stem_ISO_8859_1.sbl
  30. 218
    0
      contrib/snowball/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
  31. 236
    0
      contrib/snowball/algorithms/romanian/stem_ISO_8859_2.sbl
  32. 236
    0
      contrib/snowball/algorithms/romanian/stem_Unicode.sbl
  33. 217
    0
      contrib/snowball/algorithms/russian/stem_KOI8_R.sbl
  34. 215
    0
      contrib/snowball/algorithms/russian/stem_Unicode.sbl
  35. 230
    0
      contrib/snowball/algorithms/spanish/stem_ISO_8859_1.sbl
  36. 230
    0
      contrib/snowball/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
  37. 72
    0
      contrib/snowball/algorithms/swedish/stem_ISO_8859_1.sbl
  38. 72
    0
      contrib/snowball/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
  39. 477
    0
      contrib/snowball/algorithms/turkish/stem_Unicode.sbl
  40. 959
    0
      contrib/snowball/compiler/analyser.c
  41. 257
    0
      contrib/snowball/compiler/driver.c
  42. 1465
    0
      contrib/snowball/compiler/generator.c
  43. 1452
    0
      contrib/snowball/compiler/generator_java.c
  44. 324
    0
      contrib/snowball/compiler/header.h
  45. 263
    0
      contrib/snowball/compiler/space.c
  46. 84
    0
      contrib/snowball/compiler/syswords.h
  47. 13
    0
      contrib/snowball/compiler/syswords2.h
  48. 470
    0
      contrib/snowball/compiler/tokeniser.c
  49. 15
    0
      contrib/snowball/doc/TODO
  50. 125
    0
      contrib/snowball/doc/libstemmer_c_README
  51. 40
    0
      contrib/snowball/doc/libstemmer_java_README
  52. 209
    0
      contrib/snowball/examples/stemwords.c
  53. 79
    0
      contrib/snowball/include/libstemmer.h
  54. 31
    0
      contrib/snowball/java/org/tartarus/snowball/Among.java
  55. 432
    0
      contrib/snowball/java/org/tartarus/snowball/SnowballProgram.java
  56. 7
    0
      contrib/snowball/java/org/tartarus/snowball/SnowballStemmer.java
  57. 77
    0
      contrib/snowball/java/org/tartarus/snowball/TestApp.java
  58. 66
    0
      contrib/snowball/runtime/api.c
  59. 26
    0
      contrib/snowball/runtime/api.h
  60. 58
    0
      contrib/snowball/runtime/header.h
  61. 478
    0
      contrib/snowball/runtime/utilities.c
  62. 0
    1
      doc/doxydown
  63. 19
    0
      doc/doxydown/.gitignore
  64. 21
    0
      doc/doxydown/LICENSE
  65. 139
    0
      doc/doxydown/README.md
  66. 388
    0
      doc/doxydown/doxydown.pl
  67. 0
    1
      interface
  68. 50
    0
      interface/README.md
  69. 24
    0
      interface/css/datatables.min.css
  70. BIN
      interface/css/glyphicons-halflings-regular.woff
  71. BIN
      interface/css/glyphicons-halflings-regular.woff2
  72. 635
    0
      interface/css/rspamd.css
  73. BIN
      interface/favicon.ico
  74. BIN
      interface/img/asc.png
  75. BIN
      interface/img/desc.png
  76. BIN
      interface/img/spinner.gif
  77. BIN
      interface/img/spinner.png
  78. 320
    0
      interface/index.html
  79. 9
    0
      interface/js/d3pie.min.js
  80. 253
    0
      interface/js/datatables.min.js
  81. 1121
    0
      interface/js/rspamd.js
  82. 2
    0
      interface/plugins.txt
  83. 18
    0
      interface/react-index.html

+ 0
- 12
.gitmodules View File

@@ -1,12 +0,0 @@
[submodule "interface"]
path = interface
url = git://github.com/vstakhov/rspamd-interface
[submodule "doc/doxydown"]
path = doc/doxydown
url = https://github.com/vstakhov/doxydown.git
[submodule "contrib/snowball"]
path = contrib/snowball
url = https://github.com/vstakhov/snowball
[submodule "contrib/siphash"]
path = contrib/siphash
url = https://github.com/vstakhov/siphash

+ 0
- 1
contrib/snowball

@@ -1 +0,0 @@
Subproject commit c381f4fa958b59d41b0c596f0cbfe3ed48831e93

+ 5
- 0
contrib/snowball/.gitignore View File

@@ -0,0 +1,5 @@
*.o
/libstemmer
/snowball
/src_c
/stemwords

+ 4
- 0
contrib/snowball/.travis.yml View File

@@ -0,0 +1,4 @@
language: c
compiler: gcc
before_script: git clone https://github.com/snowballstem/snowball-data ../data
script: make check

+ 27
- 0
contrib/snowball/AUTHORS View File

@@ -0,0 +1,27 @@
Authors
=======

Martin Porter
-------------

- Designed the snowball language.
- Implemented the snowball to C compiler.
- Implemented the stemming algorithms in C.
- Wrote the documentation.

Richard Boulton
---------------

- Implemented Java backend of the snowball compiler.
- Developed build system.
- Assisted with website maintenance.


Assistance from
---------------

Olivier Bornet - fixes to java packaging and build system.
Andreas Jung - useful bug reports on the libstemmer library.
Olly Betts - several patches, bug reports, and performance improvements.
Sebastiano Vigna and Oerd Cukalla - patches for the Java stemming algorithms.
Ralf Junker - fix a potential memory leak in sb_stemmer_new().

+ 85
- 0
contrib/snowball/CMakeLists.txt View File

@@ -0,0 +1,85 @@
PROJECT(snowball C)

cmake_minimum_required(VERSION 2.8)

INCLUDE(CheckCCompilerFlag)
INCLUDE(FindPerl)

# End of configuration
SET(LIBSTEM_ALGORITHMS danish dutch english finnish french german hungarian
italian norwegian porter portuguese romanian
russian spanish swedish turkish)
SET(KOI8_ALGORITHMS russian)
SET(ISO_8859_1_ALGORITHMS danish dutch english finnish french german italian
norwegian porter portuguese spanish swedish)
SET(ISO_8859_2_ALGORITHMS hungarian romanian)
SET(OTHER_ALGORITHMS german2 kraaij_pohlmann lovins)
SET(ALL_ALGORITHMS ${LIBSTEM_ALGORITHMS} ${OTHER_ALGORITHMS})

SET(COMPILER_SOURCES compiler/space.c
compiler/tokeniser.c
compiler/analyser.c
compiler/generator.c
compiler/driver.c
compiler/generator_java.c)

SET(SNOWBALL_RUNTIME runtime/api.c
runtime/utilities.c)
SET(LIBSTEMMER_SOURCES libstemmer/libstemmer.c)
SET(LIBSTEMMER_UTF8_SOURCES libstemmer/libstemmer_utf8.c)
#LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
#LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
#LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in

SET(STEMWORDS_SOURCES examples/stemwords.c)
SET(MODULES_H "modules.h")
CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/libstemmer_c.in ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/libstemmer.c @ONLY)

MACRO(gen_stem IN ENCODING)
FOREACH(_it ${IN})
SET(_base "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/stem_${ENCODING}_${_it}")
SET(_header "${_base}.h")
SET(_source "${_base}.c")
STRING(REPLACE "UTF_8" "Unicode" _in_enc "${ENCODING}")
SET(_input "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}/stem_${_in_enc}.sbl")
IF(${_in_enc} STREQUAL "Unicode" AND NOT EXISTS ${_input})
ADD_CUSTOM_COMMAND(OUTPUT ${_source}
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/snowball "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}/stem_ISO_8859_1.sbl" -o ${_base} -eprefix ${_it}_${ENCODING}_ -r ${CMAKE_CURRENT_SOURCE_DIR}/runtime -u
DEPENDS snowball)
LIST(APPEND STEMMER_SOURCES ${_source})

ELSE()
IF(EXISTS "${_input}")
ADD_CUSTOM_COMMAND(OUTPUT ${_source}
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/snowball ${_input} -o ${_base} -eprefix ${_it}_${ENCODING}_ -r ${CMAKE_CURRENT_SOURCE_DIR}/runtime -u
DEPENDS snowball)
LIST(APPEND STEMMER_SOURCES ${_source})
ENDIF()
ENDIF()
ENDFOREACH()
ENDMACRO()

INCLUDE_DIRECTORIES("include")
INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}/libstemmer")

ADD_EXECUTABLE(snowball ${COMPILER_SOURCES})

ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h
COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/mkmodules.pl -f ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h ${CMAKE_CURRENT_BINARY_DIR}/libstemmer ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/modules.txt ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/mkinc.mak)
ADD_CUSTOM_TARGET(modules DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h")

SET(STEMMER_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/libstemmer.c")
ADD_CUSTOM_TARGET(stemmer_deps ALL)
ADD_DEPENDENCIES(stemmer_deps modules)

gen_stem("${LIBSTEM_ALGORITHMS}" "UTF_8")
gen_stem("${KOI8_ALGORITHMS}" "KOI8_R")
gen_stem("${ISO_8859_1_ALGORITHMS}" "ISO_8859_1")
gen_stem("${ISO_8859_2_ALGORITHMS}" "ISO_8859_2")


ADD_LIBRARY(stemmer ${LINK_TYPE} ${SNOWBALL_RUNTIME} ${STEMMER_SOURCES})
ADD_DEPENDENCIES(stemmer stemmer_deps)

ADD_EXECUTABLE(stemwords ${STEMWORDS_SOURCES})
TARGET_LINK_LIBRARIES(stemwords stemmer)

+ 300
- 0
contrib/snowball/GNUmakefile View File

@@ -0,0 +1,300 @@
# -*- makefile -*-

c_src_dir = src_c
java_src_main_dir = java/org/tartarus/snowball
java_src_dir = $(java_src_main_dir)/ext

libstemmer_algorithms = danish dutch english finnish french german hungarian \
italian \
norwegian porter portuguese romanian \
russian spanish swedish turkish

KOI8_R_algorithms = russian
ISO_8859_1_algorithms = danish dutch english finnish french german italian \
norwegian porter portuguese spanish swedish
ISO_8859_2_algorithms = hungarian romanian

other_algorithms = german2 kraaij_pohlmann lovins

all_algorithms = $(libstemmer_algorithms) $(other_algorithms)

COMPILER_SOURCES = compiler/space.c \
compiler/tokeniser.c \
compiler/analyser.c \
compiler/generator.c \
compiler/driver.c \
compiler/generator_java.c
COMPILER_HEADERS = compiler/header.h \
compiler/syswords.h \
compiler/syswords2.h

RUNTIME_SOURCES = runtime/api.c \
runtime/utilities.c
RUNTIME_HEADERS = runtime/api.h \
runtime/header.h

JAVARUNTIME_SOURCES = java/org/tartarus/snowball/Among.java \
java/org/tartarus/snowball/SnowballProgram.java \
java/org/tartarus/snowball/SnowballStemmer.java \
java/org/tartarus/snowball/TestApp.java

LIBSTEMMER_SOURCES = libstemmer/libstemmer.c
LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in

STEMWORDS_SOURCES = examples/stemwords.c

ALL_ALGORITHM_FILES = $(all_algorithms:%=algorithms/%/stem*.sbl)
C_LIB_SOURCES = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c) \
$(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.c) \
$(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.c) \
$(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.c)
C_LIB_HEADERS = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h) \
$(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.h) \
$(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.h) \
$(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.h)
C_OTHER_SOURCES = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c)
C_OTHER_HEADERS = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h)
JAVA_SOURCES = $(libstemmer_algorithms:%=$(java_src_dir)/%Stemmer.java)

COMPILER_OBJECTS=$(COMPILER_SOURCES:.c=.o)
RUNTIME_OBJECTS=$(RUNTIME_SOURCES:.c=.o)
LIBSTEMMER_OBJECTS=$(LIBSTEMMER_SOURCES:.c=.o)
LIBSTEMMER_UTF8_OBJECTS=$(LIBSTEMMER_UTF8_SOURCES:.c=.o)
STEMWORDS_OBJECTS=$(STEMWORDS_SOURCES:.c=.o)
C_LIB_OBJECTS = $(C_LIB_SOURCES:.c=.o)
C_OTHER_OBJECTS = $(C_OTHER_SOURCES:.c=.o)
JAVA_CLASSES = $(JAVA_SOURCES:.java=.class)
JAVA_RUNTIME_CLASSES=$(JAVARUNTIME_SOURCES:.java=.class)

CFLAGS=-Iinclude -O2
CPPFLAGS=-W -Wall -Wmissing-prototypes -Wmissing-declarations

all: snowball libstemmer.o stemwords $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS)

clean:
rm -f $(COMPILER_OBJECTS) $(RUNTIME_OBJECTS) \
$(LIBSTEMMER_OBJECTS) $(LIBSTEMMER_UTF8_OBJECTS) $(STEMWORDS_OBJECTS) snowball \
libstemmer.o stemwords \
libstemmer/modules.h \
libstemmer/modules_utf8.h \
snowball.splint \
$(C_LIB_SOURCES) $(C_LIB_HEADERS) $(C_LIB_OBJECTS) \
$(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) \
$(JAVA_SOURCES) $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES) \
libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \
libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c
rm -rf dist
rmdir $(c_src_dir) || true

snowball: $(COMPILER_OBJECTS)
$(CC) -o $@ $^

$(COMPILER_OBJECTS): $(COMPILER_HEADERS)

libstemmer/libstemmer.c: libstemmer/libstemmer_c.in
sed 's/@MODULES_H@/modules.h/' $^ >$@

libstemmer/libstemmer_utf8.c: libstemmer/libstemmer_c.in
sed 's/@MODULES_H@/modules_utf8.h/' $^ >$@

libstemmer/modules.h libstemmer/mkinc.mak: libstemmer/mkmodules.pl libstemmer/modules.txt
libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules.txt libstemmer/mkinc.mak

libstemmer/modules_utf8.h libstemmer/mkinc_utf8.mak: libstemmer/mkmodules.pl libstemmer/modules_utf8.txt
libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules_utf8.txt libstemmer/mkinc_utf8.mak utf8

libstemmer/libstemmer.o: libstemmer/modules.h $(C_LIB_HEADERS)

libstemmer.o: libstemmer/libstemmer.o $(RUNTIME_OBJECTS) $(C_LIB_OBJECTS)
$(AR) -cru $@ $^

stemwords: $(STEMWORDS_OBJECTS) libstemmer.o
$(CC) -o $@ $^

algorithms/%/stem_Unicode.sbl: algorithms/%/stem_ISO_8859_1.sbl
cp $^ $@

$(c_src_dir)/stem_UTF_8_%.c $(c_src_dir)/stem_UTF_8_%.h: algorithms/%/stem_Unicode.sbl snowball
@mkdir -p $(c_src_dir)
@l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
o="$(c_src_dir)/stem_UTF_8_$${l}"; \
echo "./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u"; \
./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u

$(c_src_dir)/stem_KOI8_R_%.c $(c_src_dir)/stem_KOI8_R_%.h: algorithms/%/stem_KOI8_R.sbl snowball
@mkdir -p $(c_src_dir)
@l=`echo "$<" | sed 's!\(.*\)/stem_KOI8_R.sbl$$!\1!;s!^.*/!!'`; \
o="$(c_src_dir)/stem_KOI8_R_$${l}"; \
echo "./snowball $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime"; \
./snowball $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime

$(c_src_dir)/stem_ISO_8859_1_%.c $(c_src_dir)/stem_ISO_8859_1_%.h: algorithms/%/stem_ISO_8859_1.sbl snowball
@mkdir -p $(c_src_dir)
@l=`echo "$<" | sed 's!\(.*\)/stem_ISO_8859_1.sbl$$!\1!;s!^.*/!!'`; \
o="$(c_src_dir)/stem_ISO_8859_1_$${l}"; \
echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime"; \
./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime

$(c_src_dir)/stem_ISO_8859_2_%.c $(c_src_dir)/stem_ISO_8859_2_%.h: algorithms/%/stem_ISO_8859_2.sbl snowball
@mkdir -p $(c_src_dir)
@l=`echo "$<" | sed 's!\(.*\)/stem_ISO_8859_2.sbl$$!\1!;s!^.*/!!'`; \
o="$(c_src_dir)/stem_ISO_8859_2_$${l}"; \
echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime"; \
./snowball $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime

$(c_src_dir)/stem_%.o: $(c_src_dir)/stem_%.c $(c_src_dir)/stem_%.h
$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<

$(java_src_dir)/%Stemmer.java: algorithms/%/stem_Unicode.sbl snowball
@mkdir -p $(java_src_dir)
@l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
o="$(java_src_dir)/$${l}Stemmer"; \
echo "./snowball $< -j -o $${o} -p \"org.tartarus.snowball.SnowballStemmer\" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer"; \
./snowball $< -j -o $${o} -p "org.tartarus.snowball.SnowballStemmer" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer

splint: snowball.splint
snowball.splint: $(COMPILER_SOURCES)
splint $^ >$@ -weak

# Make a full source distribution
dist: dist_snowball dist_libstemmer_c dist_libstemmer_java

# Make a distribution of all the sources involved in snowball
dist_snowball: $(COMPILER_SOURCES) $(COMPILER_HEADERS) \
$(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
$(LIBSTEMMER_SOURCES) \
$(LIBSTEMMER_UTF8_SOURCES) \
$(LIBSTEMMER_HEADERS) \
$(LIBSTEMMER_EXTRA) \
$(ALL_ALGORITHM_FILES) $(STEMWORDS_SOURCES) \
GNUmakefile README doc/TODO libstemmer/mkmodules.pl
destname=snowball_code; \
dest=dist/$${destname}; \
rm -rf $${dest} && \
rm -f $${dest}.tgz && \
for file in $^; do \
dir=`dirname $$file` && \
mkdir -p $${dest}/$${dir} && \
cp -a $${file} $${dest}/$${dir} || exit 1 ; \
done && \
(cd dist && tar zcf $${destname}.tgz $${destname}) && \
rm -rf $${dest}

# Make a distribution of all the sources required to compile the C library.
dist_libstemmer_c: \
$(RUNTIME_SOURCES) \
$(RUNTIME_HEADERS) \
$(LIBSTEMMER_SOURCES) \
$(LIBSTEMMER_UTF8_SOURCES) \
$(LIBSTEMMER_HEADERS) \
$(LIBSTEMMER_EXTRA) \
$(C_LIB_SOURCES) \
$(C_LIB_HEADERS) \
libstemmer/mkinc.mak \
libstemmer/mkinc_utf8.mak
destname=libstemmer_c; \
dest=dist/$${destname}; \
rm -rf $${dest} && \
rm -f $${dest}.tgz && \
mkdir -p $${dest} && \
cp -a doc/libstemmer_c_README $${dest}/README && \
mkdir -p $${dest}/examples && \
cp -a examples/stemwords.c $${dest}/examples && \
mkdir -p $${dest}/$(c_src_dir) && \
cp -a $(C_LIB_SOURCES) $(C_LIB_HEADERS) $${dest}/$(c_src_dir) && \
mkdir -p $${dest}/runtime && \
cp -a $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) $${dest}/runtime && \
mkdir -p $${dest}/libstemmer && \
cp -a $(LIBSTEMMER_SOURCES) $(LIBSTEMMER_UTF8_SOURCES) $(LIBSTEMMER_HEADERS) $(LIBSTEMMER_EXTRA) $${dest}/libstemmer && \
mkdir -p $${dest}/include && \
mv $${dest}/libstemmer/libstemmer.h $${dest}/include && \
(cd $${dest} && \
echo "README" >> MANIFEST && \
ls $(c_src_dir)/*.c $(c_src_dir)/*.h >> MANIFEST && \
ls runtime/*.c runtime/*.h >> MANIFEST && \
ls libstemmer/*.c libstemmer/*.h >> MANIFEST && \
ls include/*.h >> MANIFEST) && \
cp -a libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak $${dest}/ && \
echo 'include mkinc.mak' >> $${dest}/Makefile && \
echo 'CFLAGS=-Iinclude' >> $${dest}/Makefile && \
echo 'all: libstemmer.o stemwords' >> $${dest}/Makefile && \
echo 'libstemmer.o: $$(snowball_sources:.c=.o)' >> $${dest}/Makefile && \
echo ' $$(AR) -cru $$@ $$^' >> $${dest}/Makefile && \
echo 'stemwords: examples/stemwords.o libstemmer.o' >> $${dest}/Makefile && \
echo ' $$(CC) -o $$@ $$^' >> $${dest}/Makefile && \
echo 'clean:' >> $${dest}/Makefile && \
echo ' rm -f stemwords *.o $(c_src_dir)/*.o runtime/*.o libstemmer/*.o' >> $${dest}/Makefile && \
(cd dist && tar zcf $${destname}.tgz $${destname}) && \
rm -rf $${dest}

# Make a distribution of all the sources required to compile the Java library.
dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
$(LIBSTEMMER_EXTRA) \
$(JAVA_SOURCES)
destname=libstemmer_java; \
dest=dist/$${destname}; \
rm -rf $${dest} && \
rm -f $${dest}.tgz && \
mkdir -p $${dest} && \
cp -a doc/libstemmer_java_README $${dest}/README && \
mkdir -p $${dest}/$(java_src_dir) && \
cp -a $(JAVA_SOURCES) $${dest}/$(java_src_dir) && \
mkdir -p $${dest}/$(java_src_main_dir) && \
cp -a $(JAVARUNTIME_SOURCES) $${dest}/$(java_src_main_dir) && \
(cd $${dest} && \
echo "README" >> MANIFEST && \
ls $(java_src_dir)/*.java >> MANIFEST && \
ls $(java_src_main_dir)/*.java >> MANIFEST) && \
(cd dist && tar zcf $${destname}.tgz $${destname}) && \
rm -rf $${dest}

check: check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r

check_utf8: $(libstemmer_algorithms:%=check_utf8_%)

check_iso_8859_1: $(ISO_8859_1_algorithms:%=check_iso_8859_1_%)

check_iso_8859_2: $(ISO_8859_2_algorithms:%=check_iso_8859_2_%)

check_koi8r: $(KOI8_R_algorithms:%=check_koi8r_%)

# Where the data files are located - assumed their repo is checked out as
# a sibling to this one.
STEMMING_DATA = ../snowball-data

check_utf8_%: $(STEMMING_DATA)/% stemwords
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with UTF-8"
@./stemwords -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt
@diff -u $</output.txt tmp.txt
@if [ -e $</diffs.txt ] ; \
then \
./stemwords -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt -p2 && \
diff -u $</diffs.txt tmp.txt; \
fi
@rm tmp.txt

check_iso_8859_1_%: $(STEMMING_DATA)/% stemwords
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_1"
@python -c 'print(open("$</voc.txt").read().decode("utf8").encode("iso8859-1"))' | \
./stemwords -c ISO_8859_1 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
@python -c 'print(open("$</output.txt").read().decode("utf8").encode("iso8859-1"))' | \
diff -u - tmp.txt
@rm tmp.txt

check_iso_8859_2_%: $(STEMMING_DATA)/% stemwords
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_2"
@python -c 'print(open("$</voc.txt").read().decode("utf8").encode("iso8859-2"))' | \
./stemwords -c ISO_8859_2 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
@python -c 'print(open("$</output.txt").read().decode("utf8").encode("iso8859-2"))' | \
diff -u - tmp.txt
@rm tmp.txt

check_koi8r_%: $(STEMMING_DATA)/% stemwords
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with KOI8R"
@python -c 'print(open("$</voc.txt").read().decode("utf8").encode("koi8_r"))' | \
./stemwords -c KOI8_R -l `echo $<|sed 's!.*/!!'` -o tmp.txt
@python -c 'print(open("$</output.txt").read().decode("utf8").encode("koi8_r"))' | \
diff -u - tmp.txt
@rm tmp.txt

+ 5
- 0
contrib/snowball/README View File

@@ -0,0 +1,5 @@
This contains the source code for the snowball compiler and the stemming
algorithms on the website.

See http://snowball.tartarus.org/ for more details.


+ 91
- 0
contrib/snowball/algorithms/danish/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,91 @@
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
undouble
)

externals ( stem )

strings ( ch )

integers ( p1 x )

groupings ( v s_ending )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef ae hex 'E6'
stringdef ao hex 'E5'
stringdef o/ hex 'F8'

define v 'aeiouy{ae}{ao}{o/}'

define s_ending 'abcdfghjklmnoprtvyz{ao}'

define mark_regions as (

$p1 = limit

test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)

backwardmode (

define main_suffix as (
setlimit tomark p1 for ([substring])
among(

'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
'erets' 'et' 'eret'
(delete)
's'
(s_ending delete)
)
)

define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'gd' // significant in the call from other_suffix
'dt' 'gt' 'kt'
)
)
next] delete
)

define other_suffix as (
do ( ['st'] 'ig' delete )
setlimit tomark p1 for ([substring])
among(
'ig' 'lig' 'elig' 'els'
(delete do consonant_pair)
'l{o/}st'
(<-'l{o/}s')
)
)
define undouble as (
setlimit tomark p1 for ([non-v] ->ch)
ch
delete
)
)

define stem as (

do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
do undouble
)
)

+ 91
- 0
contrib/snowball/algorithms/danish/stem_MS_DOS_Latin_I.sbl View File

@@ -0,0 +1,91 @@
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
undouble
)

externals ( stem )

strings ( ch )

integers ( p1 x )

groupings ( v s_ending )

stringescapes {}

/* special characters (in MS-DOS Latin I) */

stringdef ae hex '91'
stringdef ao hex '86'
stringdef o/ hex '9B'

define v 'aeiouy{ae}{ao}{o/}'

define s_ending 'abcdfghjklmnoprtvyz{ao}'

define mark_regions as (

$p1 = limit

test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)

backwardmode (

define main_suffix as (
setlimit tomark p1 for ([substring])
among(

'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
'erets' 'et' 'eret'
(delete)
's'
(s_ending delete)
)
)

define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'gd' // significant in the call from other_suffix
'dt' 'gt' 'kt'
)
)
next] delete
)

define other_suffix as (
do ( ['st'] 'ig' delete )
setlimit tomark p1 for ([substring])
among(
'ig' 'lig' 'elig' 'els'
(delete do consonant_pair)
'l{o/}st'
(<-'l{o/}s')
)
)
define undouble as (
setlimit tomark p1 for ([non-v] ->ch)
ch
delete
)
)

define stem as (

do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
do undouble
)
)

+ 164
- 0
contrib/snowball/algorithms/dutch/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,164 @@
routines (
prelude postlude
e_ending
en_ending
mark_regions
R1 R2
undouble
standard_suffix
)

externals ( stem )

booleans ( e_found )

integers ( p1 p2 )

groupings ( v v_I v_j )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef a" hex 'E4'
stringdef e" hex 'EB'
stringdef i" hex 'EF'
stringdef o" hex 'F6'
stringdef u" hex 'FC'

stringdef a' hex 'E1'
stringdef e' hex 'E9'
stringdef i' hex 'ED'
stringdef o' hex 'F3'
stringdef u' hex 'FA'

stringdef e` hex 'E8'

define v 'aeiouy{e`}'
define v_I v + 'I'
define v_j v + 'j'

define prelude as (
test repeat (
[substring] among(
'{a"}' '{a'}'
(<- 'a')
'{e"}' '{e'}'
(<- 'e')
'{i"}' '{i'}'
(<- 'i')
'{o"}' '{o'}'
(<- 'o')
'{u"}' '{u'}'
(<- 'u')
'' (next)
) //or next
)
try(['y'] <- 'Y')
repeat goto (
v [('i'] v <- 'I') or
('y'] <- 'Y')
)
)

define mark_regions as (

$p1 = limit
$p2 = limit

gopast v gopast non-v setmark p1
try($p1 < 3 $p1 = 3) // at least 3
gopast v gopast non-v setmark p2

)

define postlude as repeat (

[substring] among(
'Y' (<- 'y')
'I' (<- 'i')
'' (next)
) //or next

)

backwardmode (

define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define undouble as (
test among('kk' 'dd' 'tt') [next] delete
)

define e_ending as (
unset e_found
['e'] R1 test non-v delete
set e_found
undouble
)

define en_ending as (
R1 non-v and not 'gem' delete
undouble
)

define standard_suffix as (
do (
[substring] among(
'heden'
( R1 <- 'heid'
)
'en' 'ene'
( en_ending
)
's' 'se'
( R1 non-v_j delete
)
)
)
do e_ending

do ( ['heid'] R2 not 'c' delete
['en'] en_ending
)

do (
[substring] among(
'end' 'ing'
( R2 delete
(['ig'] R2 not 'e' delete) or undouble
)
'ig'
( R2 not 'e' delete
)
'lijk'
( R2 delete e_ending
)
'baar'
( R2 delete
)
'bar'
( R2 e_found delete
)
)
)
do (
non-v_I
test (
among ('aa' 'ee' 'oo' 'uu')
non-v
)
[next] delete
)
)
)

define stem as (

do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)

+ 164
- 0
contrib/snowball/algorithms/dutch/stem_MS_DOS_Latin_I.sbl View File

@@ -0,0 +1,164 @@
routines (
prelude postlude
e_ending
en_ending
mark_regions
R1 R2
undouble
standard_suffix
)

externals ( stem )

booleans ( e_found )

integers ( p1 p2 )

groupings ( v v_I v_j )

stringescapes {}

/* special characters (in MS-DOS Latin I) */

stringdef a" hex '84'
stringdef e" hex '89'
stringdef i" hex '8B'
stringdef o" hex '94'
stringdef u" hex '81'

stringdef a' hex 'A0'
stringdef e' hex '82'
stringdef i' hex 'A1'
stringdef o' hex 'A2'
stringdef u' hex 'A3'

stringdef e` hex '8A'

define v 'aeiouy{e`}'
define v_I v + 'I'
define v_j v + 'j'

define prelude as (
test repeat (
[substring] among(
'{a"}' '{a'}'
(<- 'a')
'{e"}' '{e'}'
(<- 'e')
'{i"}' '{i'}'
(<- 'i')
'{o"}' '{o'}'
(<- 'o')
'{u"}' '{u'}'
(<- 'u')
'' (next)
) //or next
)
try(['y'] <- 'Y')
repeat goto (
v [('i'] v <- 'I') or
('y'] <- 'Y')
)
)

define mark_regions as (

$p1 = limit
$p2 = limit

gopast v gopast non-v setmark p1
try($p1 < 3 $p1 = 3) // at least 3
gopast v gopast non-v setmark p2

)

define postlude as repeat (

[substring] among(
'Y' (<- 'y')
'I' (<- 'i')
'' (next)
) //or next

)

backwardmode (

define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define undouble as (
test among('kk' 'dd' 'tt') [next] delete
)

define e_ending as (
unset e_found
['e'] R1 test non-v delete
set e_found
undouble
)

define en_ending as (
R1 non-v and not 'gem' delete
undouble
)

define standard_suffix as (
do (
[substring] among(
'heden'
( R1 <- 'heid'
)
'en' 'ene'
( en_ending
)
's' 'se'
( R1 non-v_j delete
)
)
)
do e_ending

do ( ['heid'] R2 not 'c' delete
['en'] en_ending
)

do (
[substring] among(
'end' 'ing'
( R2 delete
(['ig'] R2 not 'e' delete) or undouble
)
'ig'
( R2 not 'e' delete
)
'lijk'
( R2 delete e_ending
)
'baar'
( R2 delete
)
'bar'
( R2 e_found delete
)
)
)
do (
non-v_I
test (
among ('aa' 'ee' 'oo' 'uu')
non-v
)
[next] delete
)
)
)

define stem as (

do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)

+ 229
- 0
contrib/snowball/algorithms/english/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,229 @@
integers ( p1 p2 )
booleans ( Y_found )

routines (
prelude postlude
mark_regions
shortv
R1 R2
Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5
exception1
exception2
)

externals ( stem )

groupings ( v v_WXY valid_LI )

stringescapes {}

define v 'aeiouy'
define v_WXY v + 'wxY'

define valid_LI 'cdeghkmnrt'

define prelude as (
unset Y_found
do ( ['{'}'] delete)
do ( ['y'] <-'Y' set Y_found)
do repeat(goto (v ['y']) <-'Y' set Y_found)
)

define mark_regions as (
$p1 = limit
$p2 = limit
do(
among (
'gener'
'commun' // added May 2005
'arsen' // added Nov 2006 (arsenic/arsenal)
// ... extensions possible here ...
) or (gopast v gopast non-v)
setmark p1
gopast v gopast non-v setmark p2
)
)

backwardmode (

define shortv as (
( non-v_WXY v non-v )
or
( non-v v atlimit )
)

define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define Step_1a as (
try (
[substring] among (
'{'}' '{'}s' '{'}s{'}'
(delete)
)
)
[substring] among (
'sses' (<-'ss')
'ied' 'ies'
((hop 2 <-'i') or <-'ie')
's' (next gopast v delete)
'us' 'ss'
)
)

define Step_1b as (
[substring] among (
'eed' 'eedly'
(R1 <-'ee')
'ed' 'edly' 'ing' 'ingly'
(
test gopast v delete
test substring among(
'at' 'bl' 'iz'
(<+ 'e')
'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
// ignoring double c, h, j, k, q, v, w, and x
([next] delete)
'' (atmark p1 test shortv <+ 'e')
)
)
)
)

define Step_1c as (
['y' or 'Y']
non-v not atlimit
<-'i'
)

define Step_2 as (
[substring] R1 among (
'tional' (<-'tion')
'enci' (<-'ence')
'anci' (<-'ance')
'abli' (<-'able')
'entli' (<-'ent')
'izer' 'ization'
(<-'ize')
'ational' 'ation' 'ator'
(<-'ate')
'alism' 'aliti' 'alli'
(<-'al')
'fulness' (<-'ful')
'ousli' 'ousness'
(<-'ous')
'iveness' 'iviti'
(<-'ive')
'biliti' 'bli'
(<-'ble')
'ogi' ('l' <-'og')
'fulli' (<-'ful')
'lessli' (<-'less')
'li' (valid_LI delete)
)
)

define Step_3 as (
[substring] R1 among (
'tional' (<- 'tion')
'ational' (<- 'ate')
'alize' (<-'al')
'icate' 'iciti' 'ical'
(<-'ic')
'ful' 'ness'
(delete)
'ative'
(R2 delete) // 'R2' added Dec 2001
)
)

define Step_4 as (
[substring] R2 among (
'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
'ment' 'ent' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
(delete)
'ion' ('s' or 't' delete)
)
)

define Step_5 as (
[substring] among (
'e' (R2 or (R1 not shortv) delete)
'l' (R2 'l' delete)
)
)

define exception2 as (

[substring] atlimit among(
'inning' 'outing' 'canning' 'herring' 'earring'
'proceed' 'exceed' 'succeed'

// ... extensions possible here ...

)
)
)

define exception1 as (

[substring] atlimit among(

/* special changes: */

'skis' (<-'ski')
'skies' (<-'sky')
'dying' (<-'die')
'lying' (<-'lie')
'tying' (<-'tie')

/* special -LY cases */

'idly' (<-'idl')
'gently' (<-'gentl')
'ugly' (<-'ugli')
'early' (<-'earli')
'only' (<-'onli')
'singly' (<-'singl')

// ... extensions possible here ...

/* invariant forms: */

'sky'
'news'
'howe'

'atlas' 'cosmos' 'bias' 'andes' // not plural forms

// ... extensions possible here ...
)
)

define postlude as (Y_found repeat(goto (['Y']) <-'y'))

define stem as (

exception1 or
not hop 3 or (
do prelude
do mark_regions
backwards (

do Step_1a

exception2 or (

do Step_1b
do Step_1c

do Step_2
do Step_3
do Step_4

do Step_5
)
)
do postlude
)
)

+ 196
- 0
contrib/snowball/algorithms/finnish/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,196 @@

/* Finnish stemmer.

Numbers in square brackets refer to the sections in
Fred Karlsson, Finnish: An Essential Grammar. Routledge, 1999
ISBN 0-415-20705-3

*/

routines (
mark_regions
R2
particle_etc possessive
LONG VI
case_ending
i_plural
t_plural
other_endings
tidy
)

externals ( stem )

integers ( p1 p2 )
strings ( x )
booleans ( ending_removed )
groupings ( AEI V1 V2 particle_end )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef a" hex 'E4'
stringdef o" hex 'F6'

define AEI 'a{a"}ei'
define V1 'aeiouy{a"}{o"}'
define V2 'aeiou{a"}{o"}'
define particle_end V1 + 'nt'

define mark_regions as (

$p1 = limit
$p2 = limit

goto V1 gopast non-V1 setmark p1
goto V1 gopast non-V1 setmark p2
)

backwardmode (

define R2 as $p2 <= cursor

define particle_etc as (
setlimit tomark p1 for ([substring])
among(
'kin'
'kaan' 'k{a"}{a"}n'
'ko' 'k{o"}'
'han' 'h{a"}n'
'pa' 'p{a"}' // Particles [91]
(particle_end)
'sti' // Adverb [87]
(R2)
)
delete
)
define possessive as ( // [36]
setlimit tomark p1 for ([substring])
among(
'si'
(not 'k' delete) // take 'ksi' as the Comitative case
'ni'
(delete ['kse'] <- 'ksi') // kseni = ksi + ni
'nsa' 'ns{a"}'
'mme'
'nne'
(delete)
/* Now for Vn possessives after case endings: [36] */
'an'
(among('ta' 'ssa' 'sta' 'lla' 'lta' 'na') delete)
'{a"}n'
(among('t{a"}' 'ss{a"}' 'st{a"}'
'll{a"}' 'lt{a"}' 'n{a"}') delete)
'en'
(among('lle' 'ine') delete)
)
)

define LONG as
among('aa' 'ee' 'ii' 'oo' 'uu' '{a"}{a"}' '{o"}{o"}')

define VI as ('i' V2)

define case_ending as (
setlimit tomark p1 for ([substring])
among(
'han' ('a') //-.
'hen' ('e') // |
'hin' ('i') // |
'hon' ('o') // |
'h{a"}n' ('{a"}') // Illative [43]
'h{o"}n' ('{o"}') // |
'siin' VI // |
'seen' LONG //-'

'den' VI
'tten' VI // Genitive plurals [34]
()
'n' // Genitive or Illative
( try ( LONG // Illative
or 'ie' // Genitive
and next ]
)
/* otherwise Genitive */
)

'a' '{a"}' //-.
(V1 non-V1) // |
'tta' 'tt{a"}' // Partitive [32]
('e') // |
'ta' 't{a"}' //-'

'ssa' 'ss{a"}' // Inessive [41]
'sta' 'st{a"}' // Elative [42]

'lla' 'll{a"}' // Adessive [44]
'lta' 'lt{a"}' // Ablative [51]
'lle' // Allative [46]
'na' 'n{a"}' // Essive [49]
'ksi' // Translative[50]
'ine' // Comitative [51]

/* Abessive and Instructive are too rare for
inclusion [51] */

)
delete
set ending_removed
)
define other_endings as (
setlimit tomark p2 for ([substring])
among(
'mpi' 'mpa' 'mp{a"}'
'mmi' 'mma' 'mm{a"}' // Comparative forms [85]
(not 'po') //-improves things
'impi' 'impa' 'imp{a"}'
'immi' 'imma' 'imm{a"}' // Superlative forms [86]
'eja' 'ej{a"}' // indicates agent [93.1B]
)
delete
)
define i_plural as ( // [26]
setlimit tomark p1 for ([substring])
among(
'i' 'j'
)
delete
)
define t_plural as ( // [26]
setlimit tomark p1 for (
['t'] test V1
delete
)
setlimit tomark p2 for ([substring])
among(
'mma' (not 'po') //-mmat endings
'imma' //-immat endings
)
delete
)
define tidy as (
setlimit tomark p1 for (
do ( LONG and ([next] delete ) ) // undouble vowel
do ( [AEI] non-V1 delete ) // remove trailing a, a", e, i
do ( ['j'] 'o' or 'u' delete )
do ( ['o'] 'j' delete )
)
goto non-V1 [next] -> x x delete // undouble consonant
)
)

define stem as (

do mark_regions
unset ending_removed
backwards (
do particle_etc
do possessive
do case_ending
do other_endings
(ending_removed do i_plural) or do t_plural
do tidy
)
)


+ 248
- 0
contrib/snowball/algorithms/french/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,248 @@
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
i_verb_suffix
verb_suffix
residual_suffix
un_double
un_accent
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v keep_with_s )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef a^ hex 'E2' // a-circumflex
stringdef a` hex 'E0' // a-grave
stringdef c, hex 'E7' // c-cedilla

stringdef e" hex 'EB' // e-diaeresis (rare)
stringdef e' hex 'E9' // e-acute
stringdef e^ hex 'EA' // e-circumflex
stringdef e` hex 'E8' // e-grave
stringdef i" hex 'EF' // i-diaeresis
stringdef i^ hex 'EE' // i-circumflex
stringdef o^ hex 'F4' // o-circumflex
stringdef u^ hex 'FB' // u-circumflex
stringdef u` hex 'F9' // u-grave

define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'

define prelude as repeat goto (

( v [ ('u' ] v <- 'U') or
('i' ] v <- 'I') or
('y' ] <- 'Y')
)
or
( ['y'] v <- 'Y' )
or
( 'q' ['u'] <- 'U' )
)

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v v next )
or
among ( // this exception list begun Nov 2006
'par' // paris, parie, pari
'col' // colis
'tap' // tapis
// extensions possible here
)
or
( next gopast v )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (

[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'Y' (<- 'y')
'' (next)
)
)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define standard_suffix as (
[substring] among(

'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
'ances' 'iqUes' 'ismes' 'ables' 'istes'
( R2 delete )
'atrice' 'ateur' 'ation'
'atrices' 'ateurs' 'ations'
( R2 delete
try ( ['ic'] (R2 delete) or <-'iqU' )
)
'logie'
'logies'
( R2 <- 'log' )
'usion' 'ution'
'usions' 'utions'
( R2 <- 'u' )
'ence'
'ences'
( R2 <- 'ent' )
'ement'
'ements'
(
RV delete
try (
[substring] among(
'iv' (R2 delete ['at'] R2 delete)
'eus' ((R2 delete) or (R1<-'eux'))
'abl' 'iqU'
(R2 delete)
'i{e`}r' 'I{e`}r' //)
(RV <-'i') //)--new 2 Sept 02
)
)
)
'it{e'}'
'it{e'}s'
(
R2 delete
try (
[substring] among(
'abil' ((R2 delete) or <-'abl')
'ic' ((R2 delete) or <-'iqU')
'iv' (R2 delete)
)
)
)
'if' 'ive'
'ifs' 'ives'
(
R2 delete
try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
)
'eaux' (<- 'eau')
'aux' (R1 <- 'al')
'euse'
'euses'((R2 delete) or (R1<-'eux'))

'issement'
'issements'(R1 non-v delete) // verbal

// fail(...) below forces entry to verb_suffix. -ment typically
// follows the p.p., e.g 'confus{e'}ment'.

'amment' (RV fail(<- 'ant'))
'emment' (RV fail(<- 'ent'))
'ment'
'ments' (test(v RV) fail(delete))
// v is e,i,u,{e'},I or U
)
)

define i_verb_suffix as setlimit tomark pV for (
[substring] among (
'{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
'issez' 'issiez' 'issions' 'issons' 'it'
(non-v delete)
)
)

define verb_suffix as setlimit tomark pV for (
[substring] among (
'ions'
(R2 delete)

'{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
'erons' 'eront' 'ez' 'iez'

// 'ons' //-best omitted

(delete)

'{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
'assions'
(delete
try(['e'] delete)
)
)
)

define keep_with_s 'aiou{e`}s'

define residual_suffix as (
try(['s'] test non-keep_with_s delete)
setlimit tomark pV for (
[substring] among(
'ion' (R2 's' or 't' delete)
'ier' 'i{e`}re'
'Ier' 'I{e`}re' (<-'i')
'e' (delete)
'{e"}' ('gu' delete)
)
)
)

define un_double as (
test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
)

define un_accent as (
atleast 1 non-v
[ '{e'}' or '{e`}' ] <-'e'
)
)

define stem as (

do prelude
do mark_regions
backwards (

do (
(
( standard_suffix or
i_verb_suffix or
verb_suffix
)
and
try( [ ('Y' ] <- 'i' ) or
('{c,}'] <- 'c' )
)
) or
residual_suffix
)

// try(['ent'] RV delete) // is best omitted

do un_double
do un_accent
)
do postlude
)


+ 239
- 0
contrib/snowball/algorithms/french/stem_MS_DOS_Latin_I.sbl View File

@@ -0,0 +1,239 @@
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
i_verb_suffix
verb_suffix
residual_suffix
un_double
un_accent
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v keep_with_s )

stringescapes {}

/* special characters (in MS-DOS Latin I) */

stringdef a^ hex '83' // a-circumflex
stringdef a` hex '85' // a-grave
stringdef c, hex '87' // c-cedilla

stringdef e" hex '89' // e-diaeresis (rare)
stringdef e' hex '82' // e-acute
stringdef e^ hex '88' // e-circumflex
stringdef e` hex '8A' // e-grave
stringdef i" hex '8B' // i-diaeresis
stringdef i^ hex '8C' // i-circumflex
stringdef o^ hex '93' // o-circumflex
stringdef u^ hex '96' // u-circumflex
stringdef u` hex '97' // u-grave

define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'

define prelude as repeat goto (

( v [ ('u' ] v <- 'U') or
('i' ] v <- 'I') or
('y' ] <- 'Y')
)
or
( ['y'] v <- 'Y' )
or
( 'q' ['u'] <- 'U' )
)

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v v next ) or ( next gopast v )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (

[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'Y' (<- 'y')
'' (next)
)
)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define standard_suffix as (
[substring] among(

'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
'ances' 'iqUes' 'ismes' 'ables' 'istes'
( R2 delete )
'atrice' 'ateur' 'ation'
'atrices' 'ateurs' 'ations'
( R2 delete
try ( ['ic'] (R2 delete) or <-'iqU' )
)
'logie'
'logies'
( R2 <- 'log' )
'usion' 'ution'
'usions' 'utions'
( R2 <- 'u' )
'ence'
'ences'
( R2 <- 'ent' )
'ement'
'ements'
(
RV delete
try (
[substring] among(
'iv' (R2 delete ['at'] R2 delete)
'eus' ((R2 delete) or (R1<-'eux'))
'abl' 'iqU'
(R2 delete)
'i{e`}r' 'I{e`}r' //)
(RV <-'i') //)--new 2 Sept 02
)
)
)
'it{e'}'
'it{e'}s'
(
R2 delete
try (
[substring] among(
'abil' ((R2 delete) or <-'abl')
'ic' ((R2 delete) or <-'iqU')
'iv' (R2 delete)
)
)
)
'if' 'ive'
'ifs' 'ives'
(
R2 delete
try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
)
'eaux' (<- 'eau')
'aux' (R1 <- 'al')
'euse'
'euses'((R2 delete) or (R1<-'eux'))

'issement'
'issements'(R1 non-v delete) // verbal

// fail(...) below forces entry to verb_suffix. -ment typically
// follows the p.p., e.g 'confus{e'}ment'.

'amment' (RV fail(<- 'ant'))
'emment' (RV fail(<- 'ent'))
'ment'
'ments' (test(v RV) fail(delete))
// v is e,i,u,{e'},I or U
)
)

define i_verb_suffix as setlimit tomark pV for (
[substring] among (
'{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
'issez' 'issiez' 'issions' 'issons' 'it'
(non-v delete)
)
)

define verb_suffix as setlimit tomark pV for (
[substring] among (
'ions'
(R2 delete)

'{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
'erons' 'eront' 'ez' 'iez'

// 'ons' //-best omitted

(delete)

'{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
'assions'
(delete
try(['e'] delete)
)
)
)

define keep_with_s 'aiou{e`}s'

define residual_suffix as (
try(['s'] test non-keep_with_s delete)
setlimit tomark pV for (
[substring] among(
'ion' (R2 's' or 't' delete)
'ier' 'i{e`}re'
'Ier' 'I{e`}re' (<-'i')
'e' (delete)
'{e"}' ('gu' delete)
)
)
)

define un_double as (
test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
)

define un_accent as (
atleast 1 non-v
[ '{e'}' or '{e`}' ] <-'e'
)
)

define stem as (

do prelude
do mark_regions
backwards (

do (
(
( standard_suffix or
i_verb_suffix or
verb_suffix
)
and
try( [ ('Y' ] <- 'i' ) or
('{c,}'] <- 'c' )
)
) or
residual_suffix
)

// try(['ent'] RV delete) // is best omitted

do un_double
do un_accent
)
do postlude
)


+ 139
- 0
contrib/snowball/algorithms/german/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,139 @@

/*
Extra rule for -nisse ending added 11 Dec 2009
*/

routines (
prelude postlude
mark_regions
R1 R2
standard_suffix
)

externals ( stem )

integers ( p1 p2 x )

groupings ( v s_ending st_ending )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef a" hex 'E4'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef ss hex 'DF'

define v 'aeiouy{a"}{o"}{u"}'

define s_ending 'bdfghklmnrt'
define st_ending s_ending - 'r'

define prelude as (

test repeat (
(
['{ss}'] <- 'ss'
) or next
)

repeat goto (
v [('u'] v <- 'U') or
('y'] v <- 'Y')
)
)

define mark_regions as (

$p1 = limit
$p2 = limit

test(hop 3 setmark x)

gopast v gopast non-v setmark p1
try($p1 < x $p1 = x) // at least 3
gopast v gopast non-v setmark p2

)

define postlude as repeat (

[substring] among(
'Y' (<- 'y')
'U' (<- 'u')
'{a"}' (<- 'a')
'{o"}' (<- 'o')
'{u"}' (<- 'u')
'' (next)
)

)

backwardmode (

define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define standard_suffix as (
do (
[substring] R1 among(
'em' 'ern' 'er'
( delete
)
'e' 'en' 'es'
( delete
try (['s'] 'nis' delete)
)
's'
( s_ending delete
)
)
)
do (
[substring] R1 among(
'en' 'er' 'est'
( delete
)
'st'
( st_ending hop 3 delete
)
)
)
do (
[substring] R2 among(
'end' 'ung'
( delete
try (['ig'] not 'e' R2 delete)
)
'ig' 'ik' 'isch'
( not 'e' delete
)
'lich' 'heit'
( delete
try (
['er' or 'en'] R1 delete
)
)
'keit'
( delete
try (
[substring] R2 among(
'lich' 'ig'
( delete
)
)
)
)
)
)
)
)

define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)

+ 139
- 0
contrib/snowball/algorithms/german/stem_MS_DOS_Latin_I.sbl View File

@@ -0,0 +1,139 @@

/*
Extra rule for -nisse ending added 11 Dec 2009
*/

routines (
prelude postlude
mark_regions
R1 R2
standard_suffix
)

externals ( stem )

integers ( p1 p2 x )

groupings ( v s_ending st_ending )

stringescapes {}

/* special characters (in MS-DOS Latin I) */

stringdef a" hex '84'
stringdef o" hex '94'
stringdef u" hex '81'
stringdef ss hex 'E1'

define v 'aeiouy{a"}{o"}{u"}'

define s_ending 'bdfghklmnrt'
define st_ending s_ending - 'r'

define prelude as (

test repeat (
(
['{ss}'] <- 'ss'
) or next
)

repeat goto (
v [('u'] v <- 'U') or
('y'] v <- 'Y')
)
)

define mark_regions as (

$p1 = limit
$p2 = limit

test(hop 3 setmark x)

gopast v gopast non-v setmark p1
try($p1 < x $p1 = x) // at least 3
gopast v gopast non-v setmark p2

)

define postlude as repeat (

[substring] among(
'Y' (<- 'y')
'U' (<- 'u')
'{a"}' (<- 'a')
'{o"}' (<- 'o')
'{u"}' (<- 'u')
'' (next)
)

)

backwardmode (

define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define standard_suffix as (
do (
[substring] R1 among(
'em' 'ern' 'er'
( delete
)
'e' 'en' 'es'
( delete
try (['s'] 'nis' delete)
)
's'
( s_ending delete
)
)
)
do (
[substring] R1 among(
'en' 'er' 'est'
( delete
)
'st'
( st_ending hop 3 delete
)
)
)
do (
[substring] R2 among(
'end' 'ung'
( delete
try (['ig'] not 'e' R2 delete)
)
'ig' 'ik' 'isch'
( not 'e' delete
)
'lich' 'heit'
( delete
try (
['er' or 'en'] R1 delete
)
)
'keit'
( delete
try (
[substring] R2 among(
'lich' 'ig'
( delete
)
)
)
)
)
)
)
)

define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)

+ 145
- 0
contrib/snowball/algorithms/german2/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,145 @@

/*
Extra rule for -nisse ending added 11 Dec 2009
*/

routines (
prelude postlude
mark_regions
R1 R2
standard_suffix
)

externals ( stem )

integers ( p1 p2 x )

groupings ( v s_ending st_ending )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef a" hex 'E4'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef ss hex 'DF'

define v 'aeiouy{a"}{o"}{u"}'

define s_ending 'bdfghklmnrt'
define st_ending s_ending - 'r'

define prelude as (

test repeat goto (
v [('u'] v <- 'U') or
('y'] v <- 'Y')
)

repeat (
[substring] among(
'{ss}' (<- 'ss')
'ae' (<- '{a"}')
'oe' (<- '{o"}')
'ue' (<- '{u"}')
'qu' (hop 2)
'' (next)
)
)

)

define mark_regions as (

$p1 = limit
$p2 = limit

test(hop 3 setmark x)

gopast v gopast non-v setmark p1
try($p1 < x $p1 = x) // at least 3
gopast v gopast non-v setmark p2

)

define postlude as repeat (

[substring] among(
'Y' (<- 'y')
'U' (<- 'u')
'{a"}' (<- 'a')
'{o"}' (<- 'o')
'{u"}' (<- 'u')
'' (next)
)

)

backwardmode (

define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define standard_suffix as (
do (
[substring] R1 among(
'em' 'ern' 'er'
( delete
)
'e' 'en' 'es'
( delete
try (['s'] 'nis' delete)
)
's'
( s_ending delete
)
)
)
do (
[substring] R1 among(
'en' 'er' 'est'
( delete
)
'st'
( st_ending hop 3 delete
)
)
)
do (
[substring] R2 among(
'end' 'ung'
( delete
try (['ig'] not 'e' R2 delete)
)
'ig' 'ik' 'isch'
( not 'e' delete
)
'lich' 'heit'
( delete
try (
['er' or 'en'] R1 delete
)
)
'keit'
( delete
try (
[substring] R2 among(
'lich' 'ig'
( delete
)
)
)
)
)
)
)
)

define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)

+ 241
- 0
contrib/snowball/algorithms/hungarian/stem_ISO_8859_2.sbl View File

@@ -0,0 +1,241 @@
/*
Hungarian Stemmer
Removes noun inflections
*/

routines (
mark_regions
R1
v_ending
case
case_special
case_other
plural
owned
sing_owner
plur_owner
instrum
factive
undouble
double
)

externals ( stem )

integers ( p1 )
groupings ( v )

stringescapes {}

/* special characters (in ISO Latin 2) */

stringdef a' hex 'E1' //a-acute
stringdef e' hex 'E9' //e-acute
stringdef i' hex 'ED' //i-acute
stringdef o' hex 'F3' //o-acute
stringdef o" hex 'F6' //o-umlaut
stringdef oq hex 'F5' //o-double acute
stringdef u' hex 'FA' //u-acute
stringdef u" hex 'FC' //u-umlaut
stringdef uq hex 'FB' //u-double acute

define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'

define mark_regions as (

$p1 = limit

(v goto non-v
among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
setmark p1)
or

(non-v gopast v setmark p1)
)

backwardmode (

define R1 as $p1 <= cursor

define v_ending as (
[substring] R1 among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)

define double as (
test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
)

define undouble as (
next [hop 1] delete
)

define instrum as(
[substring] R1 among(
'al' (double)
'el' (double)
)
delete
undouble
)


define case as (
[substring] R1 among(
'ban' 'ben'
'ba' 'be'
'ra' 're'
'nak' 'nek'
'val' 'vel'
't{o'}l' 't{oq}l'
'r{o'}l' 'r{oq}l'
'b{o'}l' 'b{oq}l'
'hoz' 'hez' 'h{o"}z'
'n{a'}l' 'n{e'}l'
'ig'
'at' 'et' 'ot' '{o"}t'
'{e'}rt'
'k{e'}pp' 'k{e'}ppen'
'kor'
'ul' '{u"}l'
'v{a'}' 'v{e'}'
'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
'k{e'}nt'
'en' 'on' 'an' '{o"}n'
'n'
't'
)
delete
v_ending
)

define case_special as(
[substring] R1 among(
'{e'}n' (<- 'e')
'{a'}n' (<- 'a')
'{a'}nk{e'}nt' (<- 'a')
)
)

define case_other as(
[substring] R1 among(
'astul' 'est{u"}l' (delete)
'stul' 'st{u"}l' (delete)
'{a'}stul' (<- 'a')
'{e'}st{u"}l' (<- 'e')
)
)

define factive as(
[substring] R1 among(
'{a'}' (double)
'{e'}' (double)
)
delete
undouble
)

define plural as (
[substring] R1 among(
'{a'}k' (<- 'a')
'{e'}k' (<- 'e')
'{o"}k' (delete)
'ak' (delete)
'ok' (delete)
'ek' (delete)
'k' (delete)
)
)

define owned as (
[substring] R1 among (
'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
'{e'}k{e'}' (<- 'e')
'{a'}k{e'}' (<- 'a')
'k{e'}' (delete)
'{e'}{e'}i' (<- 'e')
'{a'}{e'}i' (<- 'a')
'{e'}i' (delete)
'{e'}{e'}' (<- 'e')
'{e'}' (delete)
)
)

define sing_owner as (
[substring] R1 among(
'{u"}nk' 'unk' (delete)
'{a'}nk' (<- 'a')
'{e'}nk' (<- 'e')
'nk' (delete)
'{a'}juk' (<- 'a')
'{e'}j{u"}k' (<- 'e')
'juk' 'j{u"}k' (delete)
'uk' '{u"}k' (delete)
'em' 'om' 'am' (delete)
'{a'}m' (<- 'a')
'{e'}m' (<- 'e')
'm' (delete)
'od' 'ed' 'ad' '{o"}d' (delete)
'{a'}d' (<- 'a')
'{e'}d' (<- 'e')
'd' (delete)
'ja' 'je' (delete)
'a' 'e' 'o' (delete)
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)

define plur_owner as (
[substring] R1 among(
'jaim' 'jeim' (delete)
'{a'}im' (<- 'a')
'{e'}im' (<- 'e')
'aim' 'eim' (delete)
'im' (delete)
'jaid' 'jeid' (delete)
'{a'}id' (<- 'a')
'{e'}id' (<- 'e')
'aid' 'eid' (delete)
'id' (delete)
'jai' 'jei' (delete)
'{a'}i' (<- 'a')
'{e'}i' (<- 'e')
'ai' 'ei' (delete)
'i' (delete)
'jaink' 'jeink' (delete)
'eink' 'aink' (delete)
'{a'}ink' (<- 'a')
'{e'}ink' (<- 'e')
'ink'
'jaitok' 'jeitek' (delete)
'aitok' 'eitek' (delete)
'{a'}itok' (<- 'a')
'{e'}itek' (<- 'e')
'itek' (delete)
'jeik' 'jaik' (delete)
'aik' 'eik' (delete)
'{a'}ik' (<- 'a')
'{e'}ik' (<- 'e')
'ik' (delete)
)
)
)

define stem as (
do mark_regions
backwards (
do instrum
do case
do case_special
do case_other
do factive
do owned
do sing_owner
do plur_owner
do plural
)
)

+ 241
- 0
contrib/snowball/algorithms/hungarian/stem_Unicode.sbl View File

@@ -0,0 +1,241 @@
/*
Hungarian Stemmer
Removes noun inflections
*/

routines (
mark_regions
R1
v_ending
case
case_special
case_other
plural
owned
sing_owner
plur_owner
instrum
factive
undouble
double
)

externals ( stem )

integers ( p1 )
groupings ( v )

stringescapes {}

/* special characters (in Unicode) */

stringdef a' hex 'E1' //a-acute
stringdef e' hex 'E9' //e-acute
stringdef i' hex 'ED' //i-acute
stringdef o' hex 'F3' //o-acute
stringdef o" hex 'F6' //o-umlaut
stringdef oq hex '151' //o-double acute
stringdef u' hex 'FA' //u-acute
stringdef u" hex 'FC' //u-umlaut
stringdef uq hex '171' //u-double acute

define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'

define mark_regions as (

$p1 = limit

(v goto non-v
among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
setmark p1)
or

(non-v gopast v setmark p1)
)

backwardmode (

define R1 as $p1 <= cursor

define v_ending as (
[substring] R1 among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)

define double as (
test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
)

define undouble as (
next [hop 1] delete
)

define instrum as(
[substring] R1 among(
'al' (double)
'el' (double)
)
delete
undouble
)


define case as (
[substring] R1 among(
'ban' 'ben'
'ba' 'be'
'ra' 're'
'nak' 'nek'
'val' 'vel'
't{o'}l' 't{oq}l'
'r{o'}l' 'r{oq}l'
'b{o'}l' 'b{oq}l'
'hoz' 'hez' 'h{o"}z'
'n{a'}l' 'n{e'}l'
'ig'
'at' 'et' 'ot' '{o"}t'
'{e'}rt'
'k{e'}pp' 'k{e'}ppen'
'kor'
'ul' '{u"}l'
'v{a'}' 'v{e'}'
'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
'k{e'}nt'
'en' 'on' 'an' '{o"}n'
'n'
't'
)
delete
v_ending
)

define case_special as(
[substring] R1 among(
'{e'}n' (<- 'e')
'{a'}n' (<- 'a')
'{a'}nk{e'}nt' (<- 'a')
)
)

define case_other as(
[substring] R1 among(
'astul' 'est{u"}l' (delete)
'stul' 'st{u"}l' (delete)
'{a'}stul' (<- 'a')
'{e'}st{u"}l' (<- 'e')
)
)

define factive as(
[substring] R1 among(
'{a'}' (double)
'{e'}' (double)
)
delete
undouble
)

define plural as (
[substring] R1 among(
'{a'}k' (<- 'a')
'{e'}k' (<- 'e')
'{o"}k' (delete)
'ak' (delete)
'ok' (delete)
'ek' (delete)
'k' (delete)
)
)

define owned as (
[substring] R1 among (
'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
'{e'}k{e'}' (<- 'e')
'{a'}k{e'}' (<- 'a')
'k{e'}' (delete)
'{e'}{e'}i' (<- 'e')
'{a'}{e'}i' (<- 'a')
'{e'}i' (delete)
'{e'}{e'}' (<- 'e')
'{e'}' (delete)
)
)

define sing_owner as (
[substring] R1 among(
'{u"}nk' 'unk' (delete)
'{a'}nk' (<- 'a')
'{e'}nk' (<- 'e')
'nk' (delete)
'{a'}juk' (<- 'a')
'{e'}j{u"}k' (<- 'e')
'juk' 'j{u"}k' (delete)
'uk' '{u"}k' (delete)
'em' 'om' 'am' (delete)
'{a'}m' (<- 'a')
'{e'}m' (<- 'e')
'm' (delete)
'od' 'ed' 'ad' '{o"}d' (delete)
'{a'}d' (<- 'a')
'{e'}d' (<- 'e')
'd' (delete)
'ja' 'je' (delete)
'a' 'e' 'o' (delete)
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)

define plur_owner as (
[substring] R1 among(
'jaim' 'jeim' (delete)
'{a'}im' (<- 'a')
'{e'}im' (<- 'e')
'aim' 'eim' (delete)
'im' (delete)
'jaid' 'jeid' (delete)
'{a'}id' (<- 'a')
'{e'}id' (<- 'e')
'aid' 'eid' (delete)
'id' (delete)
'jai' 'jei' (delete)
'{a'}i' (<- 'a')
'{e'}i' (<- 'e')
'ai' 'ei' (delete)
'i' (delete)
'jaink' 'jeink' (delete)
'eink' 'aink' (delete)
'{a'}ink' (<- 'a')
'{e'}ink' (<- 'e')
'ink'
'jaitok' 'jeitek' (delete)
'aitok' 'eitek' (delete)
'{a'}itok' (<- 'a')
'{e'}itek' (<- 'e')
'itek' (delete)
'jeik' 'jaik' (delete)
'aik' 'eik' (delete)
'{a'}ik' (<- 'a')
'{e'}ik' (<- 'e')
'ik' (delete)
)
)
)

define stem as (
do mark_regions
backwards (
do instrum
do case
do case_special
do case_other
do factive
do owned
do sing_owner
do plur_owner
do plural
)
)

+ 195
- 0
contrib/snowball/algorithms/italian/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,195 @@

routines (
prelude postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
verb_suffix
vowel_suffix
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v AEIO CG )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef a' hex 'E1'
stringdef a` hex 'E0'
stringdef e' hex 'E9'
stringdef e` hex 'E8'
stringdef i' hex 'ED'
stringdef i` hex 'EC'
stringdef o' hex 'F3'
stringdef o` hex 'F2'
stringdef u' hex 'FA'
stringdef u` hex 'F9'

define v 'aeiou{a`}{e`}{i`}{o`}{u`}'

define prelude as (
test repeat (
[substring] among(
'{a'}' (<- '{a`}')
'{e'}' (<- '{e`}')
'{i'}' (<- '{i`}')
'{o'}' (<- '{o`}')
'{u'}' (<- '{u`}')
'qu' (<- 'qU')
'' (next)
)
)
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (

[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)

)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define attached_pronoun as (
[substring] among(
'ci' 'gli' 'la' 'le' 'li' 'lo'
'mi' 'ne' 'si' 'ti' 'vi'
// the compound forms are:
'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
'mela' 'mele' 'meli' 'melo' 'mene'
'tela' 'tele' 'teli' 'telo' 'tene'
'cela' 'cele' 'celi' 'celo' 'cene'
'vela' 'vele' 'veli' 'velo' 'vene'
)
among( (RV)
'ando' 'endo' (delete)
'ar' 'er' 'ir' (<- 'e')
)
)

define standard_suffix as (
[substring] among(

'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
'atrice' 'atrici'
'ante' 'anti' // Note 1
( R2 delete )
'azione' 'azioni' 'atore' 'atori'
( R2 delete
try ( ['ic'] R2 delete )
)
'logia' 'logie'
( R2 <- 'log' )
'uzione' 'uzioni' 'usione' 'usioni'
( R2 <- 'u' )
'enza' 'enze'
( R2 <- 'ente' )
'amento' 'amenti' 'imento' 'imenti'
( RV delete )
'amente' (
R1 delete
try (
[substring] R2 delete among(
'iv' ( ['at'] R2 delete )
'os' 'ic' 'abil'
)
)
)
'it{a`}' (
R2 delete
try (
[substring] among(
'abil' 'ic' 'iv' (R2 delete)
)
)
)
'ivo' 'ivi' 'iva' 'ive' (
R2 delete
try ( ['at'] R2 delete ['ic'] R2 delete )
)
)
)

define verb_suffix as setlimit tomark pV for (
[substring] among(
'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
'ono' 'uta' 'ute' 'uti' 'uto'

'ar' 'ir' // but 'er' is problematical
(delete)
)
)

define AEIO 'aeio{a`}{e`}{i`}{o`}'
define CG 'cg'

define vowel_suffix as (
try (
[AEIO] RV delete
['i'] RV delete
)
try (
['h'] CG RV delete
)
)
)

define stem as (
do prelude
do mark_regions
backwards (
do attached_pronoun
do (standard_suffix or verb_suffix)
do vowel_suffix
)
do postlude
)

/*
Note 1: additions of 15 Jun 2005
*/


+ 195
- 0
contrib/snowball/algorithms/italian/stem_MS_DOS_Latin_I.sbl View File

@@ -0,0 +1,195 @@

routines (
prelude postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
verb_suffix
vowel_suffix
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v AEIO CG )

stringescapes {}

/* special characters (in MS-DOS Latin I) */

stringdef a' hex 'A0'
stringdef a` hex '85'
stringdef e' hex '82'
stringdef e` hex '8A'
stringdef i' hex 'A1'
stringdef i` hex '8D'
stringdef o' hex 'A2'
stringdef o` hex '95'
stringdef u' hex 'A3'
stringdef u` hex '97'

define v 'aeiou{a`}{e`}{i`}{o`}{u`}'

define prelude as (
test repeat (
[substring] among(
'{a'}' (<- '{a`}')
'{e'}' (<- '{e`}')
'{i'}' (<- '{i`}')
'{o'}' (<- '{o`}')
'{u'}' (<- '{u`}')
'qu' (<- 'qU')
'' (next)
)
)
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (

[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)

)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define attached_pronoun as (
[substring] among(
'ci' 'gli' 'la' 'le' 'li' 'lo'
'mi' 'ne' 'si' 'ti' 'vi'
// the compound forms are:
'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
'mela' 'mele' 'meli' 'melo' 'mene'
'tela' 'tele' 'teli' 'telo' 'tene'
'cela' 'cele' 'celi' 'celo' 'cene'
'vela' 'vele' 'veli' 'velo' 'vene'
)
among( (RV)
'ando' 'endo' (delete)
'ar' 'er' 'ir' (<- 'e')
)
)

define standard_suffix as (
[substring] among(

'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
'atrice' 'atrici'
'ante' 'anti' // Note 1
( R2 delete )
'azione' 'azioni' 'atore' 'atori'
( R2 delete
try ( ['ic'] R2 delete )
)
'logia' 'logie'
( R2 <- 'log' )
'uzione' 'uzioni' 'usione' 'usioni'
( R2 <- 'u' )
'enza' 'enze'
( R2 <- 'ente' )
'amento' 'amenti' 'imento' 'imenti'
( RV delete )
'amente' (
R1 delete
try (
[substring] R2 delete among(
'iv' ( ['at'] R2 delete )
'os' 'ic' 'abil'
)
)
)
'it{a`}' (
R2 delete
try (
[substring] among(
'abil' 'ic' 'iv' (R2 delete)
)
)
)
'ivo' 'ivi' 'iva' 'ive' (
R2 delete
try ( ['at'] R2 delete ['ic'] R2 delete )
)
)
)

define verb_suffix as setlimit tomark pV for (
[substring] among(
'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
'ono' 'uta' 'ute' 'uti' 'uto'

'ar' 'ir' // but 'er' is problematical
(delete)
)
)

define AEIO 'aeio{a`}{e`}{i`}{o`}'
define CG 'cg'

define vowel_suffix as (
try (
[AEIO] RV delete
['i'] RV delete
)
try (
['h'] CG RV delete
)
)
)

define stem as (
do prelude
do mark_regions
backwards (
do attached_pronoun
do (standard_suffix or verb_suffix)
do vowel_suffix
)
do postlude
)

/*
Note 1: additions of 15 Jun 2005
*/


+ 245
- 0
contrib/snowball/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,245 @@
strings ( ch )
integers ( x p1 p2 )
booleans ( Y_found stemmed GE_removed )

routines (

R1 R2
C V VX
lengthen_V
Step_1 Step_2 Step_3 Step_4 Step_7
Step_6 Step_1c
Lose_prefix
Lose_infix
measure
)

externals ( stem )

groupings ( v v_WX AOU AIOU )

stringescapes {}

stringdef ' hex '27' // yuk

define v 'aeiouy'
define v_WX v + 'wx'
define AOU 'aou'
define AIOU 'aiou'

backwardmode (

define R1 as (setmark x $x >= p1)
define R2 as (setmark x $x >= p2)

define V as test (v or 'ij')
define VX as test (next v or 'ij')
define C as test (not 'ij' non-v)

define lengthen_V as do (
non-v_WX [ (AOU] test (non-v or atlimit)) or
('e'] test (non-v or atlimit
not AIOU
not (next AIOU non-v)))
->ch insert ch
)

define Step_1 as
(
[among ( (])

'{'}s' (delete)
's' (R1 not ('t' R1) C delete)
'ies' (R1 <-'ie')
'es'
(('ar' R1 C ] delete lengthen_V) or
('er' R1 C ] delete) or
(R1 C <-'e'))

'aus' (R1 V <-'au')
'en' (('hed' R1 ] <-'heid') or
('nd' delete) or
('d' R1 C ] delete) or
('i' or 'j' V delete) or
(R1 C delete lengthen_V))
'nde' (<-'nd')
)
)

define Step_2 as
(
[among ( (])
'je' (('{'}t' ] delete) or
('et' ] R1 C delete) or
('rnt' ] <-'rn') or
('t' ] R1 VX delete) or
('ink' ] <-'ing') or
('mp' ] <-'m') or
('{'}' ] R1 delete) or
(] R1 C delete))
'ge' (R1 <-'g')
'lijke'(R1 <-'lijk')
'ische'(R1 <-'isch')
'de' (R1 C delete)
'te' (R1 <-'t')
'se' (R1 <-'s')
're' (R1 <-'r')
'le' (R1 delete attach 'l' lengthen_V)
'ene' (R1 C delete attach 'en' lengthen_V)
'ieve' (R1 C <-'ief')
)
)

define Step_3 as
(
[among ( (])
'atie' (R1 <-'eer')
'iteit' (R1 delete lengthen_V)
'heid'
'sel'
'ster' (R1 delete)
'rder' (<-'r')
'ing'
'isme'
'erij' (R1 delete lengthen_V)
'arij' (R1 C <-'aar')
'fie' (R2 delete attach 'f' lengthen_V)
'gie' (R2 delete attach 'g' lengthen_V)
'tst' (R1 C <-'t')
'dst' (R1 C <-'d')
)
)

define Step_4 as
(
( [among ( (])
'ioneel' (R1 <-'ie')
'atief' (R1 <-'eer')
'baar' (R1 delete)
'naar' (R1 V <-'n')
'laar' (R1 V <-'l')
'raar' (R1 V <-'r')
'tant' (R1 <-'teer')
'lijker'
'lijkst' (R1 <-'lijk')
'achtig'
'achtiger'
'achtigst'(R1 delete)
'eriger'
'erigst'
'erig'
'end' (R1 C delete lengthen_V)
)
)
or
( [among ( (])
'iger'
'igst'
'ig' (R1 C delete lengthen_V)
)
)
)

define Step_7 as
(
[among ( (])
'kt' (<-'k')
'ft' (<-'f')
'pt' (<-'p')
)
)

define Step_6 as
(
[among ( (])
'bb' (<-'b')
'cc' (<-'c')
'dd' (<-'d')
'ff' (<-'f')
'gg' (<-'g')
'hh' (<-'h')
'jj' (<-'j')
'kk' (<-'k')
'll' (<-'l')
'mm' (<-'m')
'nn' (<-'n')
'pp' (<-'p')
'qq' (<-'q')
'rr' (<-'r')
'ss' (<-'s')
'tt' (<-'t')
'vv' (<-'v')
'ww' (<-'w')
'xx' (<-'x')
'zz' (<-'z')
'v' (<-'f')
'z' (<-'s')
)
)

define Step_1c as
(
[among ( (] R1 C)
'd' (not ('n' R1) delete)
't' (not ('h' R1) delete)
)
)
)

define Lose_prefix as (
['ge'] test hop 3 (goto v goto non-v)
set GE_removed
delete
)

define Lose_infix as (
next
gopast (['ge']) test hop 3 (goto v goto non-v)
set GE_removed
delete
)

define measure as (
do (
tolimit
setmark p1
setmark p2
)
do(
repeat non-v atleast 1 ('ij' or v) non-v setmark p1
repeat non-v atleast 1 ('ij' or v) non-v setmark p2
)

)
define stem as (

unset Y_found
unset stemmed
do ( ['y'] <-'Y' set Y_found )
do repeat(goto (v ['y'])<-'Y' set Y_found )

measure

backwards (
do (Step_1 set stemmed )
do (Step_2 set stemmed )
do (Step_3 set stemmed )
do (Step_4 set stemmed )
)
unset GE_removed
do (Lose_prefix and measure)
backwards (
do (GE_removed Step_1c)
)
unset GE_removed
do (Lose_infix and measure)
backwards (
do (GE_removed Step_1c)
)
backwards (
do (Step_7 set stemmed )
do (stemmed or GE_removed Step_6)
)
do(Y_found repeat(goto (['Y']) <-'y'))
)


+ 208
- 0
contrib/snowball/algorithms/lovins/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,208 @@

stringescapes {}

routines (
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA BB CC

endings

undouble respell
)

externals ( stem )

backwardmode (

/* Lovins' conditions A, B ... CC, as given in her Appendix B, where
a test for a two letter prefix ('test hop 2') is implicitly
assumed. Note that 'e' next 'u' corresponds to her u*e because
Snowball is scanning backwards. */

define A as ( hop 2 )
define B as ( hop 3 )
define C as ( hop 4 )
define D as ( hop 5 )
define E as ( test hop 2 not 'e' )
define F as ( test hop 3 not 'e' )
define G as ( test hop 3 'f' )
define H as ( test hop 2 't' or 'll' )
define I as ( test hop 2 not 'o' not 'e' )
define J as ( test hop 2 not 'a' not 'e' )
define K as ( test hop 3 'l' or 'i' or ('e' next 'u') )
define L as ( test hop 2 not 'u' not 'x' not ('s' not 'o') )
define M as ( test hop 2 not 'a' not 'c' not 'e' not 'm' )
define N as ( test hop 3 ( hop 2 not 's' or hop 2 ) )
define O as ( test hop 2 'l' or 'i' )
define P as ( test hop 2 not 'c' )
define Q as ( test hop 2 test hop 3 not 'l' not 'n' )
define R as ( test hop 2 'n' or 'r' )
define S as ( test hop 2 'dr' or ('t' not 't') )
define T as ( test hop 2 's' or ('t' not 'o') )
define U as ( test hop 2 'l' or 'm' or 'n' or 'r' )
define V as ( test hop 2 'c' )
define W as ( test hop 2 not 's' not 'u' )
define X as ( test hop 2 'l' or 'i' or ('e' next 'u') )
define Y as ( test hop 2 'in' )
define Z as ( test hop 2 not 'f' )
define AA as ( test hop 2 among ( 'd' 'f' 'ph' 'th' 'l' 'er' 'or'
'es' 't' ) )
define BB as ( test hop 3 not 'met' not 'ryst' )
define CC as ( test hop 2 'l' )


/* The system of endings, as given in Appendix A. */

define endings as (
[substring] among(
'alistically' B 'arizability' A 'izationally' B

'antialness' A 'arisations' A 'arizations' A 'entialness' A

'allically' C 'antaneous' A 'antiality' A 'arisation' A
'arization' A 'ationally' B 'ativeness' A 'eableness' E
'entations' A 'entiality' A 'entialize' A 'entiation' A
'ionalness' A 'istically' A 'itousness' A 'izability' A
'izational' A

'ableness' A 'arizable' A 'entation' A 'entially' A
'eousness' A 'ibleness' A 'icalness' A 'ionalism' A
'ionality' A 'ionalize' A 'iousness' A 'izations' A
'lessness' A

'ability' A 'aically' A 'alistic' B 'alities' A
'ariness' E 'aristic' A 'arizing' A 'ateness' A
'atingly' A 'ational' B 'atively' A 'ativism' A
'elihood' E 'encible' A 'entally' A 'entials' A
'entiate' A 'entness' A 'fulness' A 'ibility' A
'icalism' A 'icalist' A 'icality' A 'icalize' A
'ication' G 'icianry' A 'ination' A 'ingness' A
'ionally' A 'isation' A 'ishness' A 'istical' A
'iteness' A 'iveness' A 'ivistic' A 'ivities' A
'ization' F 'izement' A 'oidally' A 'ousness' A

'aceous' A 'acious' B 'action' G 'alness' A
'ancial' A 'ancies' A 'ancing' B 'ariser' A
'arized' A 'arizer' A 'atable' A 'ations' B
'atives' A 'eature' Z 'efully' A 'encies' A
'encing' A 'ential' A 'enting' C 'entist' A
'eously' A 'ialist' A 'iality' A 'ialize' A
'ically' A 'icance' A 'icians' A 'icists' A
'ifully' A 'ionals' A 'ionate' D 'ioning' A
'ionist' A 'iously' A 'istics' A 'izable' E
'lessly' A 'nesses' A 'oidism' A

'acies' A 'acity' A 'aging' B 'aical' A
'alist' A 'alism' B 'ality' A 'alize' A
'allic'BB 'anced' B 'ances' B 'antic' C
'arial' A 'aries' A 'arily' A 'arity' B
'arize' A 'aroid' A 'ately' A 'ating' I
'ation' B 'ative' A 'ators' A 'atory' A
'ature' E 'early' Y 'ehood' A 'eless' A
'elity' A 'ement' A 'enced' A 'ences' A
'eness' E 'ening' E 'ental' A 'ented' C
'ently' A 'fully' A 'ially' A 'icant' A
'ician' A 'icide' A 'icism' A 'icist' A
'icity' A 'idine' I 'iedly' A 'ihood' A
'inate' A 'iness' A 'ingly' B 'inism' J
'inity'CC 'ional' A 'ioned' A 'ished' A
'istic' A 'ities' A 'itous' A 'ively' A
'ivity' A 'izers' F 'izing' F 'oidal' A
'oides' A 'otide' A 'ously' A

'able' A 'ably' A 'ages' B 'ally' B
'ance' B 'ancy' B 'ants' B 'aric' A
'arly' K 'ated' I 'ates' A 'atic' B
'ator' A 'ealy' Y 'edly' E 'eful' A
'eity' A 'ence' A 'ency' A 'ened' E
'enly' E 'eous' A 'hood' A 'ials' A
'ians' A 'ible' A 'ibly' A 'ical' A
'ides' L 'iers' A 'iful' A 'ines' M
'ings' N 'ions' B 'ious' A 'isms' B
'ists' A 'itic' H 'ized' F 'izer' F
'less' A 'lily' A 'ness' A 'ogen' A
'ward' A 'wise' A 'ying' B 'yish' A

'acy' A 'age' B 'aic' A 'als'BB
'ant' B 'ars' O 'ary' F 'ata' A
'ate' A 'eal' Y 'ear' Y 'ely' E
'ene' E 'ent' C 'ery' E 'ese' A
'ful' A 'ial' A 'ian' A 'ics' A
'ide' L 'ied' A 'ier' A 'ies' P
'ily' A 'ine' M 'ing' N 'ion' Q
'ish' C 'ism' B 'ist' A 'ite'AA
'ity' A 'ium' A 'ive' A 'ize' F
'oid' A 'one' R 'ous' A

'ae' A 'al'BB 'ar' X 'as' B
'ed' E 'en' F 'es' E 'ia' A
'ic' A 'is' A 'ly' B 'on' S
'or' T 'um' U 'us' V 'yl' R
'{'}s' A 's{'}' A

'a' A 'e' A 'i' A 'o' A
's' W 'y' B

(delete)
)
)

/* Undoubling is rule 1 of appendix C. */

define undouble as (
test substring among ('bb' 'dd' 'gg' 'll' 'mm' 'nn' 'pp' 'rr' 'ss'
'tt')
[next] delete
)

/* The other appendix C rules can be done together. */

define respell as (
[substring] among (
'iev' (<-'ief')
'uct' (<-'uc')
'umpt' (<-'um')
'rpt' (<-'rb')
'urs' (<-'ur')
'istr' (<-'ister')
'metr' (<-'meter')
'olv' (<-'olut')
'ul' (not 'a' not 'i' not 'o' <-'l')
'bex' (<-'bic')
'dex' (<-'dic')
'pex' (<-'pic')
'tex' (<-'tic')
'ax' (<-'ac')
'ex' (<-'ec')
'ix' (<-'ic')
'lux' (<-'luc')
'uad' (<-'uas')
'vad' (<-'vas')
'cid' (<-'cis')
'lid' (<-'lis')
'erid' (<-'eris')
'pand' (<-'pans')
'end' (not 's' <-'ens')
'ond' (<-'ons')
'lud' (<-'lus')
'rud' (<-'rus')
'her' (not 'p' not 't' <-'hes')
'mit' (<-'mis')
'ent' (not 'm' <-'ens')
/* 'ent' was 'end' in the 1968 paper - a typo. */
'ert' (<-'ers')
'et' (not 'n' <-'es')
'yt' (<-'ys')
'yz' (<-'ys')
)
)
)

define stem as (

backwards (
do endings
do undouble
do respell
)
)


+ 80
- 0
contrib/snowball/algorithms/norwegian/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,80 @@
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)

externals ( stem )

integers ( p1 x )

groupings ( v s_ending )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef ae hex 'E6'
stringdef ao hex 'E5'
stringdef o/ hex 'F8'

define v 'aeiouy{ae}{ao}{o/}'

define s_ending 'bcdfghjlmnoprtvyz'

define mark_regions as (

$p1 = limit

test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)

backwardmode (

define main_suffix as (
setlimit tomark p1 for ([substring])
among(

'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
'hetens' 'ers' 'ets' 'et' 'het' 'ast'
(delete)
's'
(s_ending or ('k' non-v) delete)
'erte' 'ert'
(<-'er')
)
)

define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'dt' 'vt'
)
)
next] delete
)

define other_suffix as (
setlimit tomark p1 for ([substring])
among(
'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
'hetslov'
(delete)
)
)
)

define stem as (

do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)

+ 80
- 0
contrib/snowball/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl View File

@@ -0,0 +1,80 @@
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)

externals ( stem )

integers ( p1 x )

groupings ( v s_ending )

stringescapes {}

/* special characters (in MS-DOS Latin I) */

stringdef ae hex '91'
stringdef ao hex '86'
stringdef o/ hex '9B'

define v 'aeiouy{ae}{ao}{o/}'

define s_ending 'bcdfghjlmnoprtvyz'

define mark_regions as (

$p1 = limit

test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)

backwardmode (

define main_suffix as (
setlimit tomark p1 for ([substring])
among(

'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
'hetens' 'ers' 'ets' 'et' 'het' 'ast'
(delete)
's'
(s_ending or ('k' non-v) delete)
'erte' 'ert'
(<-'er')
)
)

define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'dt' 'vt'
)
)
next] delete
)

define other_suffix as (
setlimit tomark p1 for ([substring])
among(
'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
'hetslov'
(delete)
)
)
)

define stem as (

do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)

+ 139
- 0
contrib/snowball/algorithms/porter/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,139 @@
integers ( p1 p2 )
booleans ( Y_found )

routines (
shortv
R1 R2
Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5a Step_5b
)

externals ( stem )

groupings ( v v_WXY )

define v 'aeiouy'
define v_WXY v + 'wxY'

backwardmode (

define shortv as ( non-v_WXY v non-v )

define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define Step_1a as (
[substring] among (
'sses' (<-'ss')
'ies' (<-'i')
'ss' ()
's' (delete)
)
)

define Step_1b as (
[substring] among (
'eed' (R1 <-'ee')
'ed'
'ing' (
test gopast v delete
test substring among(
'at' 'bl' 'iz'
(<+ 'e')
'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
// ignoring double c, h, j, k, q, v, w, and x
([next] delete)
'' (atmark p1 test shortv <+ 'e')
)
)
)
)

define Step_1c as (
['y' or 'Y']
gopast v
<-'i'
)

define Step_2 as (
[substring] R1 among (
'tional' (<-'tion')
'enci' (<-'ence')
'anci' (<-'ance')
'abli' (<-'able')
'entli' (<-'ent')
'eli' (<-'e')
'izer' 'ization'
(<-'ize')
'ational' 'ation' 'ator'
(<-'ate')
'alli' (<-'al')
'alism' 'aliti'
(<-'al')
'fulness' (<-'ful')
'ousli' 'ousness'
(<-'ous')
'iveness' 'iviti'
(<-'ive')
'biliti' (<-'ble')
)
)

define Step_3 as (
[substring] R1 among (
'alize' (<-'al')
'icate' 'iciti' 'ical'
(<-'ic')
'ative' 'ful' 'ness'
(delete)
)
)

define Step_4 as (
[substring] R2 among (
'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
'ment' 'ent' 'ou' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
(delete)
'ion' ('s' or 't' delete)
)
)

define Step_5a as (
['e']
R2 or (R1 not shortv)
delete
)

define Step_5b as (
['l']
R2 'l'
delete
)
)

define stem as (

unset Y_found
do ( ['y'] <-'Y' set Y_found)
do repeat(goto (v ['y']) <-'Y' set Y_found)

$p1 = limit
$p2 = limit
do(
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)

backwards (
do Step_1a
do Step_1b
do Step_1c
do Step_2
do Step_3
do Step_4
do Step_5a
do Step_5b
)

do(Y_found repeat(goto (['Y']) <-'y'))

)

+ 218
- 0
contrib/snowball/algorithms/portuguese/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,218 @@
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
verb_suffix
residual_suffix
residual_form
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef a' hex 'E1' // a-acute
stringdef a^ hex 'E2' // a-circumflex e.g. 'bota^nico
stringdef e' hex 'E9' // e-acute
stringdef e^ hex 'EA' // e-circumflex
stringdef i' hex 'ED' // i-acute
stringdef o^ hex 'F4' // o-circumflex
stringdef o' hex 'F3' // o-acute
stringdef u' hex 'FA' // u-acute
stringdef c, hex 'E7' // c-cedilla

stringdef a~ hex 'E3' // a-tilde
stringdef o~ hex 'F5' // o-tilde


define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'

define prelude as repeat (
[substring] among(
'{a~}' (<- 'a~')
'{o~}' (<- 'o~')
'' (next)
) //or next
)

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (
[substring] among(
'a~' (<- '{a~}')
'o~' (<- '{o~}')
'' (next)
) //or next
)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define standard_suffix as (
[substring] among(

'eza' 'ezas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'{a'}vel'
'{i'}vel'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amento' 'amentos'
'imento' 'imentos'

'adora' 'ador' 'a{c,}a~o'
'adoras' 'adores' 'a{c,}o~es' // no -ic test
'ante' 'antes' '{a^}ncia' // Note 1
(
R2 delete
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'{e^}ncia' '{e^}ncias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'avel'
'{i'}vel' (R2 delete)
)
)
)
'idade'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
'ira' 'iras'
(
RV 'e' // -eira -eiras usually non-verbal
<- 'ir'
)
)
)

define verb_suffix as setlimit tomark pV for (
[substring] among(
'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
'{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
'{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
'{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
'{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'

'ira' 'iras'
(delete)
)
)

define residual_suffix as (
[substring] among(
'os'
'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
)
)

define residual_form as (
[substring] among(
'e' '{e'}' '{e^}'
( RV delete [('u'] test 'g') or
('i'] test 'c') RV delete )
'{c,}' (<-'c')
)
)
)

define stem as (
do prelude
do mark_regions
backwards (
do (
( ( standard_suffix or verb_suffix )
and do ( ['i'] test 'c' RV delete )
)
or residual_suffix
)
do residual_form
)
do postlude
)

/*
Note 1: additions of 15 Jun 2005
*/

+ 218
- 0
contrib/snowball/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl View File

@@ -0,0 +1,218 @@
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
verb_suffix
residual_suffix
residual_form
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v )

stringescapes {}

/* special characters (in MS-DOS Latin I) */

stringdef a' hex 'A0' // a-acute
stringdef a^ hex '83' // a-circumflex e.g. 'bota^nico
stringdef e' hex '82' // e-acute
stringdef e^ hex '88' // e-circumflex
stringdef i' hex 'A1' // i-acute
stringdef o^ hex '93' // o-circumflex
stringdef o' hex 'A2' // o-acute
stringdef u' hex 'A3' // u-acute
stringdef c, hex '87' // c-cedilla

stringdef a~ hex 'C6' // a-tilde
stringdef o~ hex 'E4' // o-tilde


define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'

define prelude as repeat (
[substring] among(
'{a~}' (<- 'a~')
'{o~}' (<- 'o~')
'' (next)
) //or next
)

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (
[substring] among(
'a~' (<- '{a~}')
'o~' (<- '{o~}')
'' (next)
) //or next
)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define standard_suffix as (
[substring] among(

'eza' 'ezas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'{a'}vel'
'{i'}vel'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amento' 'amentos'
'imento' 'imentos'

'adora' 'ador' 'a{c,}a~o'
'adoras' 'adores' 'a{c,}o~es' // no -ic test
'ante' 'antes' '{a^}ncia' // Note 1
(
R2 delete
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'{e^}ncia' '{e^}ncias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'avel'
'{i'}vel' (R2 delete)
)
)
)
'idade'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
'ira' 'iras'
(
RV 'e' // -eira -eiras usually non-verbal
<- 'ir'
)
)
)

define verb_suffix as setlimit tomark pV for (
[substring] among(
'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
'{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
'{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
'{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
'{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'

'ira' 'iras'
(delete)
)
)

define residual_suffix as (
[substring] among(
'os'
'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
)
)

define residual_form as (
[substring] among(
'e' '{e'}' '{e^}'
( RV delete [('u'] test 'g') or
('i'] test 'c') RV delete )
'{c,}' (<-'c')
)
)
)

define stem as (
do prelude
do mark_regions
backwards (
do (
( ( standard_suffix or verb_suffix )
and do ( ['i'] test 'c' RV delete )
)
or residual_suffix
)
do residual_form
)
do postlude
)

/*
Note 1: additions of 15 Jun 2005
*/

+ 236
- 0
contrib/snowball/algorithms/romanian/stem_ISO_8859_2.sbl View File

@@ -0,0 +1,236 @@

routines (
prelude postlude mark_regions
RV R1 R2
step_0
standard_suffix combo_suffix
verb_suffix
vowel_suffix
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v )

booleans ( standard_suffix_removed )

stringescapes {}

/* special characters */

stringdef a^ hex 'E2' // a circumflex
stringdef i^ hex 'EE' // i circumflex
stringdef a+ hex 'E3' // a breve
stringdef s, hex 'BA' // s cedilla
stringdef t, hex 'FE' // t cedilla

define v 'aeiou{a^}{i^}{a+}'

define prelude as (
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (

[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)

)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define step_0 as (
[substring] R1 among(
'ul' 'ului'
( delete )
'aua'
( <-'a' )
'ea' 'ele' 'elor'
( <-'e' )
'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
( <-'i')
'ile'
( not 'ab' <- 'i' )
'atei'
( <- 'at' )
'a{t,}ie' 'a{t,}ia'
( <- 'a{t,}i' )
)
)

define combo_suffix as test (
[substring] R1 (
among(
/* 'IST'. alternative: include the following
'alism' 'alisme'
'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
<- 'al'
)
*/
'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
<- 'abil'
)
'ibilitate' (
<- 'ibil'
)
'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
<- 'iv'
)
'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
'icator' 'icatori'
'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
<- 'ic'
)
'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
'atoare' 'ator' 'atori'
'{a+}toare' '{a+}tor' '{a+}tori' (
<- 'at'
)
'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
'itoare' 'itor' 'itori' (
<- 'it'
)
)
set standard_suffix_removed
)
)

define standard_suffix as (
unset standard_suffix_removed
repeat combo_suffix
[substring] R2 (
among(

// past participle is treated here, rather than
// as a verb ending:
'at' 'ata' 'at{a+}' 'ati' 'ate'
'ut' 'uta' 'ut{a+}' 'uti' 'ute'
'it' 'ita' 'it{a+}' 'iti' 'ite'

'ic' 'ica' 'ice' 'ici' 'ic{a+}'
'abil' 'abila' 'abile' 'abili' 'abil{a+}'
'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
'ant' 'anta' 'ante' 'anti' 'ant{a+}'
'ator' 'atori'
'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
delete
)
'iune' 'iuni' (
'{t,}'] <- 't'
)
'ism' 'isme'
'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
<- 'ist'
/* 'IST'. alternative: remove with <- '' */
)
)
set standard_suffix_removed
)
)

define verb_suffix as setlimit tomark pV for (
[substring] among(
// 'long' infinitive:
'are' 'ere' 'ire' '{a^}re'

// gerund:
'ind' '{a^}nd'
'indu' '{a^}ndu'

'eze'
'easc{a+}'
// present:
'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
'e{s,}te'
'{a+}sc' '{a+}{s,}ti'
'{a+}{s,}te'

// imperfect:
'am' 'ai' 'au'
'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
'iam' 'iai' 'ia' 'ia{t,}i' 'iau'

// past: // (not 'ii')
'ui'
'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
'{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'

// pluferfect:
'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
'{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
'{a^}ser{a+}'
'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'

( non-v or 'u' delete )

// present:
'{a+}m' 'a{t,}i'
'em' 'e{t,}i'
'im' 'i{t,}i'
'{a^}m' '{a^}{t,}i'

// past:
'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
'sei' 'se'

// pluperfect:
'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
(delete)
)
)

define vowel_suffix as (
[substring] RV among (
'a' 'e' 'i' 'ie' '{a+}' ( delete )
)
)
)

define stem as (
do prelude
do mark_regions
backwards (
do step_0
do standard_suffix
do ( standard_suffix_removed or verb_suffix )
do vowel_suffix
)
do postlude
)


+ 236
- 0
contrib/snowball/algorithms/romanian/stem_Unicode.sbl View File

@@ -0,0 +1,236 @@

routines (
prelude postlude mark_regions
RV R1 R2
step_0
standard_suffix combo_suffix
verb_suffix
vowel_suffix
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v )

booleans ( standard_suffix_removed )

stringescapes {}

/* special characters */

stringdef a^ hex '0E2' // a circumflex
stringdef i^ hex '0EE' // i circumflex
stringdef a+ hex '103' // a breve
stringdef s, hex '15F' // s cedilla
stringdef t, hex '163' // t cedilla

define v 'aeiou{a^}{i^}{a+}'

define prelude as (
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (

[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)

)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define step_0 as (
[substring] R1 among(
'ul' 'ului'
( delete )
'aua'
( <-'a' )
'ea' 'ele' 'elor'
( <-'e' )
'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
( <-'i')
'ile'
( not 'ab' <- 'i' )
'atei'
( <- 'at' )
'a{t,}ie' 'a{t,}ia'
( <- 'a{t,}i' )
)
)

define combo_suffix as test (
[substring] R1 (
among(
/* 'IST'. alternative: include the following
'alism' 'alisme'
'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
<- 'al'
)
*/
'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
<- 'abil'
)
'ibilitate' (
<- 'ibil'
)
'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
<- 'iv'
)
'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
'icator' 'icatori'
'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
<- 'ic'
)
'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
'atoare' 'ator' 'atori'
'{a+}toare' '{a+}tor' '{a+}tori' (
<- 'at'
)
'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
'itoare' 'itor' 'itori' (
<- 'it'
)
)
set standard_suffix_removed
)
)

define standard_suffix as (
unset standard_suffix_removed
repeat combo_suffix
[substring] R2 (
among(

// past participle is treated here, rather than
// as a verb ending:
'at' 'ata' 'at{a+}' 'ati' 'ate'
'ut' 'uta' 'ut{a+}' 'uti' 'ute'
'it' 'ita' 'it{a+}' 'iti' 'ite'

'ic' 'ica' 'ice' 'ici' 'ic{a+}'
'abil' 'abila' 'abile' 'abili' 'abil{a+}'
'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
'ant' 'anta' 'ante' 'anti' 'ant{a+}'
'ator' 'atori'
'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
delete
)
'iune' 'iuni' (
'{t,}'] <- 't'
)
'ism' 'isme'
'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
<- 'ist'
/* 'IST'. alternative: remove with <- '' */
)
)
set standard_suffix_removed
)
)

define verb_suffix as setlimit tomark pV for (
[substring] among(
// 'long' infinitive:
'are' 'ere' 'ire' '{a^}re'

// gerund:
'ind' '{a^}nd'
'indu' '{a^}ndu'

'eze'
'easc{a+}'
// present:
'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
'e{s,}te'
'{a+}sc' '{a+}{s,}ti'
'{a+}{s,}te'

// imperfect:
'am' 'ai' 'au'
'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
'iam' 'iai' 'ia' 'ia{t,}i' 'iau'

// past: // (not 'ii')
'ui'
'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
'{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'

// pluferfect:
'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
'{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
'{a^}ser{a+}'
'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'

( non-v or 'u' delete )

// present:
'{a+}m' 'a{t,}i'
'em' 'e{t,}i'
'im' 'i{t,}i'
'{a^}m' '{a^}{t,}i'

// past:
'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
'sei' 'se'

// pluperfect:
'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
(delete)
)
)

define vowel_suffix as (
[substring] RV among (
'a' 'e' 'i' 'ie' '{a+}' ( delete )
)
)
)

define stem as (
do prelude
do mark_regions
backwards (
do step_0
do standard_suffix
do ( standard_suffix_removed or verb_suffix )
do vowel_suffix
)
do postlude
)


+ 217
- 0
contrib/snowball/algorithms/russian/stem_KOI8_R.sbl View File

@@ -0,0 +1,217 @@
stringescapes {}

/* the 32 Cyrillic letters in the KOI8-R coding scheme, and represented
in Latin characters following the conventions of the standard Library
of Congress transliteration: */

stringdef a hex 'C1'
stringdef b hex 'C2'
stringdef v hex 'D7'
stringdef g hex 'C7'
stringdef d hex 'C4'
stringdef e hex 'C5'
stringdef zh hex 'D6'
stringdef z hex 'DA'
stringdef i hex 'C9'
stringdef i` hex 'CA'
stringdef k hex 'CB'
stringdef l hex 'CC'
stringdef m hex 'CD'
stringdef n hex 'CE'
stringdef o hex 'CF'
stringdef p hex 'D0'
stringdef r hex 'D2'
stringdef s hex 'D3'
stringdef t hex 'D4'
stringdef u hex 'D5'
stringdef f hex 'C6'
stringdef kh hex 'C8'
stringdef ts hex 'C3'
stringdef ch hex 'DE'
stringdef sh hex 'DB'
stringdef shch hex 'DD'
stringdef " hex 'DF'
stringdef y hex 'D9'
stringdef ' hex 'D8'
stringdef e` hex 'DC'
stringdef iu hex 'C0'
stringdef ia hex 'D1'

routines ( mark_regions R2
perfective_gerund
adjective
adjectival
reflexive
verb
noun
derivational
tidy_up
)

externals ( stem )

integers ( pV p2 )

groupings ( v )

define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'

define mark_regions as (

$pV = limit
$p2 = limit
do (
gopast v setmark pV gopast non-v
gopast v gopast non-v setmark p2
)
)

backwardmode (

define R2 as $p2 <= cursor

define perfective_gerund as (
[substring] among (
'{v}'
'{v}{sh}{i}'
'{v}{sh}{i}{s}{'}'
('{a}' or '{ia}' delete)
'{i}{v}'
'{i}{v}{sh}{i}'
'{i}{v}{sh}{i}{s}{'}'
'{y}{v}'
'{y}{v}{sh}{i}'
'{y}{v}{sh}{i}{s}{'}'
(delete)
)
)

define adjective as (
[substring] among (
'{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
'{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
'{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
'{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
'{ia}{ia}'
// and -
'{o}{iu}' // - which is somewhat archaic
'{e}{iu}' // - soft form of {o}{iu}
(delete)
)
)

define adjectival as (
adjective

/* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
errors. Removing im, uem, enn creates too many errors.
*/

try (
[substring] among (
'{e}{m}' // present passive participle
'{n}{n}' // adjective from past passive participle
'{v}{sh}' // past active participle
'{iu}{shch}' '{shch}' // present active participle
('{a}' or '{ia}' delete)

//but not '{i}{m}' '{u}{e}{m}' // present passive participle
//or '{e}{n}{n}' // adjective from past passive participle

'{i}{v}{sh}' '{y}{v}{sh}'// past active participle
'{u}{iu}{shch}' // present active participle
(delete)
)
)

)

define reflexive as (
[substring] among (
'{s}{ia}'
'{s}{'}'
(delete)
)
)

define verb as (
[substring] among (
'{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
'{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
'{n}{y}' '{t}{'}' '{e}{sh}{'}'

'{n}{n}{o}'
('{a}' or '{ia}' delete)

'{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
'{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
'{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
'{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
'{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
'{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
(delete)
/* note the short passive participle tests:
'{n}{a}' '{n}' '{n}{o}' '{n}{y}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
*/
)
)

define noun as (
[substring] among (
'{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
'{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
'{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
'{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
'{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
'{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
(delete)
/* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
omitted - they only occur on 12 words.
*/
)
)

define derivational as (
[substring] R2 among (
'{o}{s}{t}'
'{o}{s}{t}{'}'
(delete)
)
)

define tidy_up as (
[substring] among (

'{e}{i`}{sh}'
'{e}{i`}{sh}{e}' // superlative forms
(delete
['{n}'] '{n}' delete
)
'{n}'
('{n}' delete) // e.g. -nno endings
'{'}'
(delete) // with some slight false conflations
)
)
)

define stem as (

do mark_regions
backwards setlimit tomark pV for (
do (
perfective_gerund or
( try reflexive
adjectival or verb or noun
)
)
try([ '{i}' ] delete)
// because noun ending -i{iu} is being treated as verb ending -{iu}

do derivational
do tidy_up
)
)

+ 215
- 0
contrib/snowball/algorithms/russian/stem_Unicode.sbl View File

@@ -0,0 +1,215 @@
stringescapes {}

/* the 32 Cyrillic letters in Unicode */

stringdef a hex '430'
stringdef b hex '431'
stringdef v hex '432'
stringdef g hex '433'
stringdef d hex '434'
stringdef e hex '435'
stringdef zh hex '436'
stringdef z hex '437'
stringdef i hex '438'
stringdef i` hex '439'
stringdef k hex '43A'
stringdef l hex '43B'
stringdef m hex '43C'
stringdef n hex '43D'
stringdef o hex '43E'
stringdef p hex '43F'
stringdef r hex '440'
stringdef s hex '441'
stringdef t hex '442'
stringdef u hex '443'
stringdef f hex '444'
stringdef kh hex '445'
stringdef ts hex '446'
stringdef ch hex '447'
stringdef sh hex '448'
stringdef shch hex '449'
stringdef " hex '44A'
stringdef y hex '44B'
stringdef ' hex '44C'
stringdef e` hex '44D'
stringdef iu hex '44E'
stringdef ia hex '44F'

routines ( mark_regions R2
perfective_gerund
adjective
adjectival
reflexive
verb
noun
derivational
tidy_up
)

externals ( stem )

integers ( pV p2 )

groupings ( v )

define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'

define mark_regions as (

$pV = limit
$p2 = limit
do (
gopast v setmark pV gopast non-v
gopast v gopast non-v setmark p2
)
)

backwardmode (

define R2 as $p2 <= cursor

define perfective_gerund as (
[substring] among (
'{v}'
'{v}{sh}{i}'
'{v}{sh}{i}{s}{'}'
('{a}' or '{ia}' delete)
'{i}{v}'
'{i}{v}{sh}{i}'
'{i}{v}{sh}{i}{s}{'}'
'{y}{v}'
'{y}{v}{sh}{i}'
'{y}{v}{sh}{i}{s}{'}'
(delete)
)
)

define adjective as (
[substring] among (
'{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
'{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
'{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
'{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
'{ia}{ia}'
// and -
'{o}{iu}' // - which is somewhat archaic
'{e}{iu}' // - soft form of {o}{iu}
(delete)
)
)

define adjectival as (
adjective

/* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
errors. Removing im, uem, enn creates too many errors.
*/

try (
[substring] among (
'{e}{m}' // present passive participle
'{n}{n}' // adjective from past passive participle
'{v}{sh}' // past active participle
'{iu}{shch}' '{shch}' // present active participle
('{a}' or '{ia}' delete)

//but not '{i}{m}' '{u}{e}{m}' // present passive participle
//or '{e}{n}{n}' // adjective from past passive participle

'{i}{v}{sh}' '{y}{v}{sh}'// past active participle
'{u}{iu}{shch}' // present active participle
(delete)
)
)

)

define reflexive as (
[substring] among (
'{s}{ia}'
'{s}{'}'
(delete)
)
)

define verb as (
[substring] among (
'{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
'{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
'{n}{y}' '{t}{'}' '{e}{sh}{'}'

'{n}{n}{o}'
('{a}' or '{ia}' delete)

'{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
'{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
'{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
'{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
'{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
'{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
(delete)
/* note the short passive participle tests:
'{n}{a}' '{n}' '{n}{o}' '{n}{y}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
*/
)
)

define noun as (
[substring] among (
'{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
'{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
'{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
'{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
'{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
'{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
(delete)
/* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
omitted - they only occur on 12 words.
*/
)
)

define derivational as (
[substring] R2 among (
'{o}{s}{t}'
'{o}{s}{t}{'}'
(delete)
)
)

define tidy_up as (
[substring] among (

'{e}{i`}{sh}'
'{e}{i`}{sh}{e}' // superlative forms
(delete
['{n}'] '{n}' delete
)
'{n}'
('{n}' delete) // e.g. -nno endings
'{'}'
(delete) // with some slight false conflations
)
)
)

define stem as (

do mark_regions
backwards setlimit tomark pV for (
do (
perfective_gerund or
( try reflexive
adjectival or verb or noun
)
)
try([ '{i}' ] delete)
// because noun ending -i{iu} is being treated as verb ending -{iu}

do derivational
do tidy_up
)
)

+ 230
- 0
contrib/snowball/algorithms/spanish/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,230 @@
routines (
postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
y_verb_suffix
verb_suffix
residual_suffix
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef a' hex 'E1' // a-acute
stringdef e' hex 'E9' // e-acute
stringdef i' hex 'ED' // i-acute
stringdef o' hex 'F3' // o-acute
stringdef u' hex 'FA' // u-acute
stringdef u" hex 'FC' // u-diaeresis
stringdef n~ hex 'F1' // n-tilde

define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (
[substring] among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
'{i'}' (<- 'i')
'{o'}' (<- 'o')
'{u'}' (<- 'u')
// and possibly {u"}->u here, or in prelude
'' (next)
) //or next
)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define attached_pronoun as (
[substring] among(
'me' 'se' 'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
'las' 'les' 'los' 'nos'
)
substring RV among(
'i{e'}ndo' (] <- 'iendo')
'{a'}ndo' (] <- 'ando')
'{a'}r' (] <- 'ar')
'{e'}r' (] <- 'er')
'{i'}r' (] <- 'ir')
'ando'
'iendo'
'ar' 'er' 'ir'
(delete)
'yendo' ('u' delete)
)
)

define standard_suffix as (
[substring] among(

'anza' 'anzas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'able' 'ables'
'ible' 'ibles'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amiento' 'amientos'
'imiento' 'imientos'
(
R2 delete
)
'adora' 'ador' 'aci{o'}n'
'adoras' 'adores' 'aciones'
'ante' 'antes' 'ancia' 'ancias'// Note 1
(
R2 delete
try ( ['ic'] R2 delete )
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'encia' 'encias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'able'
'ible' (R2 delete)
)
)
)
'idad'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
)
)

define y_verb_suffix as (
setlimit tomark pV for ([substring]) among(
'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
'yas' 'yes' 'yais' 'yamos'
('u' delete)
)
)

define verb_suffix as (
setlimit tomark pV for ([substring]) among(

'en' 'es' '{e'}is' 'emos'
(try ('u' test 'g') ] delete)

'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
'ar{e'}'
'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
'er{e'}'
'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
'ir{e'}'

'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
'{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
(delete)
)
)

define residual_suffix as (
[substring] among(
'os'
'a' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
'e' '{e'}'
( RV delete try( ['u'] test 'g' RV delete ) )
)
)
)

define stem as (
do mark_regions
backwards (
do attached_pronoun
do ( standard_suffix or
y_verb_suffix or
verb_suffix
)
do residual_suffix
)
do postlude
)

/*
Note 1: additions of 15 Jun 2005
*/

+ 230
- 0
contrib/snowball/algorithms/spanish/stem_MS_DOS_Latin_I.sbl View File

@@ -0,0 +1,230 @@
routines (
postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
y_verb_suffix
verb_suffix
residual_suffix
)

externals ( stem )

integers ( pV p1 p2 )

groupings ( v )

stringescapes {}

/* special characters (in MS-DOS Latin I) */

stringdef a' hex 'A0' // a-acute
stringdef e' hex '82' // e-acute
stringdef i' hex 'A1' // i-acute
stringdef o' hex 'A2' // o-acute
stringdef u' hex 'A3' // u-acute
stringdef u" hex '81' // u-diaeresis
stringdef n~ hex 'A4' // n-tilde

define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'

define mark_regions as (

$pV = limit
$p1 = limit
$p2 = limit // defaults

do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)

define postlude as repeat (
[substring] among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
'{i'}' (<- 'i')
'{o'}' (<- 'o')
'{u'}' (<- 'u')
// and possibly {u"}->u here, or in prelude
'' (next)
) //or next
)

backwardmode (

define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor

define attached_pronoun as (
[substring] among(
'me' 'se' 'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
'las' 'les' 'los' 'nos'
)
substring RV among(
'i{e'}ndo' (] <- 'iendo')
'{a'}ndo' (] <- 'ando')
'{a'}r' (] <- 'ar')
'{e'}r' (] <- 'er')
'{i'}r' (] <- 'ir')
'ando'
'iendo'
'ar' 'er' 'ir'
(delete)
'yendo' ('u' delete)
)
)

define standard_suffix as (
[substring] among(

'anza' 'anzas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'able' 'ables'
'ible' 'ibles'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amiento' 'amientos'
'imiento' 'imientos'
(
R2 delete
)
'adora' 'ador' 'aci{o'}n'
'adoras' 'adores' 'aciones'
'ante' 'antes' 'ancia' 'ancias'// Note 1
(
R2 delete
try ( ['ic'] R2 delete )
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'encia' 'encias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'able'
'ible' (R2 delete)
)
)
)
'idad'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
)
)

define y_verb_suffix as (
setlimit tomark pV for ([substring]) among(
'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
'yas' 'yes' 'yais' 'yamos'
('u' delete)
)
)

define verb_suffix as (
setlimit tomark pV for ([substring]) among(

'en' 'es' '{e'}is' 'emos'
(try ('u' test 'g') ] delete)

'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
'ar{e'}'
'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
'er{e'}'
'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
'ir{e'}'

'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
'{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
(delete)
)
)

define residual_suffix as (
[substring] among(
'os'
'a' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
'e' '{e'}'
( RV delete try( ['u'] test 'g' RV delete ) )
)
)
)

define stem as (
do mark_regions
backwards (
do attached_pronoun
do ( standard_suffix or
y_verb_suffix or
verb_suffix
)
do residual_suffix
)
do postlude
)

/*
Note 1: additions of 15 Jun 2005
*/

+ 72
- 0
contrib/snowball/algorithms/swedish/stem_ISO_8859_1.sbl View File

@@ -0,0 +1,72 @@
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)

externals ( stem )

integers ( p1 x )

groupings ( v s_ending )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef a" hex 'E4'
stringdef ao hex 'E5'
stringdef o" hex 'F6'

define v 'aeiouy{a"}{ao}{o"}'

define s_ending 'bcdfghjklmnoprtvy'

define mark_regions as (

$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)

backwardmode (

define main_suffix as (
setlimit tomark p1 for ([substring])
among(

'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
'hetens' 'erns' 'at' 'andet' 'het' 'ast'
(delete)
's'
(s_ending delete)
)
)

define consonant_pair as setlimit tomark p1 for (
among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
and ([next] delete)
)

define other_suffix as setlimit tomark p1 for (
[substring] among(
'lig' 'ig' 'els' (delete)
'l{o"}st' (<-'l{o"}s')
'fullt' (<-'full')
)
)
)

define stem as (

do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)

+ 72
- 0
contrib/snowball/algorithms/swedish/stem_MS_DOS_Latin_I.sbl View File

@@ -0,0 +1,72 @@
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)

externals ( stem )

integers ( p1 x )

groupings ( v s_ending )

stringescapes {}

/* special characters (in MS-DOS Latin I) */

stringdef a" hex '84'
stringdef ao hex '86'
stringdef o" hex '94'

define v 'aeiouy{a"}{ao}{o"}'

define s_ending 'bcdfghjklmnoprtvy'

define mark_regions as (

$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)

backwardmode (

define main_suffix as (
setlimit tomark p1 for ([substring])
among(

'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
'hetens' 'erns' 'at' 'andet' 'het' 'ast'
(delete)
's'
(s_ending delete)
)
)

define consonant_pair as setlimit tomark p1 for (
among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
and ([next] delete)
)

define other_suffix as setlimit tomark p1 for (
[substring] among(
'lig' 'ig' 'els' (delete)
'l{o"}st' (<-'l{o"}s')
'fullt' (<-'full')
)
)
)

define stem as (

do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)

+ 477
- 0
contrib/snowball/algorithms/turkish/stem_Unicode.sbl View File

@@ -0,0 +1,477 @@
/* Stemmer for Turkish
* author: Evren (Kapusuz) Çilden
* email: evren.kapusuz at gmail.com
* version: 1.0 (15.01.2007)

* stems nominal verb suffixes
* stems nominal inflections
* more than one syllable word check
* (y,n,s,U) context check
* vowel harmony check
* last consonant check and conversion (b, c, d, ğ to p, ç, t, k)
* The stemming algorithm is based on the paper "An Affix Stripping
* Morphological Analyzer for Turkish" by Gülşen Eryiğit and
* Eşref Adalı (Proceedings of the IAESTED International Conference
* ARTIFICIAL INTELLIGENCE AND APPLICATIONS, February 16-18,2004,
* Innsbruck, Austria
* Turkish is an agglutinative language and has a very rich morphological
* structure. In Turkish, you can form many different words from a single stem
* by appending a sequence of suffixes. Eg. The word "doktoruymuşsunuz" means
* "You had been the doctor of him". The stem of the word is "doktor" and it
* takes three different suffixes -sU, -ymUs, and -sUnUz. The rules about
* the append order of suffixes can be clearly described as FSMs.
* The paper referenced above defines some FSMs for right to left
* morphological analysis. I generated a method for constructing snowball
* expressions from right to left FSMs for stemming suffixes.
*/

routines (
append_U_to_stems_ending_with_d_or_g // for preventing some overstemmings
check_vowel_harmony // tests vowel harmony for suffixes
is_reserved_word // tests whether current string is a reserved word ('ad','soyad')
mark_cAsInA // nominal verb suffix
mark_DA // noun suffix
mark_DAn // noun suffix
mark_DUr // nominal verb suffix
mark_ki // noun suffix
mark_lAr // noun suffix, nominal verb suffix
mark_lArI // noun suffix
mark_nA // noun suffix
mark_ncA // noun suffix
mark_ndA // noun suffix
mark_ndAn // noun suffix
mark_nU // noun suffix
mark_nUn // noun suffix
mark_nUz // nominal verb suffix
mark_sU // noun suffix
mark_sUn // nominal verb suffix
mark_sUnUz // nominal verb suffix
mark_possessives // -(U)m,-(U)n,-(U)mUz,-(U)nUz,
mark_yA // noun suffix
mark_ylA // noun suffix
mark_yU // noun suffix
mark_yUm // nominal verb suffix
mark_yUz // nominal verb suffix
mark_yDU // nominal verb suffix
mark_yken // nominal verb suffix
mark_ymUs_ // nominal verb suffix
mark_ysA // nominal verb suffix
mark_suffix_with_optional_y_consonant
mark_suffix_with_optional_U_vowel
mark_suffix_with_optional_n_consonant
mark_suffix_with_optional_s_consonant
more_than_one_syllable_word
post_process_last_consonants
postlude

stem_nominal_verb_suffixes
stem_noun_suffixes
stem_suffix_chain_before_ki
)

/* Special characters in Unicode Latin-1 and Latin Extended-A */
stringdef c. hex 'E7' // LATIN SMALL LETTER C WITH CEDILLA
stringdef g~ hex '011F' // LATIN SMALL LETTER G WITH BREVE
stringdef i' hex '0131' // LATIN SMALL LETTER I WITHOUT DOT
stringdef o" hex 'F6' // LATIN SMALL LETTER O WITH DIAERESIS
stringdef s. hex '015F' // LATIN SMALL LETTER S WITH CEDILLA
stringdef u" hex 'FC' // LATIN SMALL LETTER U WITH DIAERESIS

stringescapes { }

integers ( strlen ) // length of a string

booleans ( continue_stemming_noun_suffixes )

groupings ( vowel U vowel1 vowel2 vowel3 vowel4 vowel5 vowel6)

define vowel 'ae{i'}io{o"}u{u"}'
define U '{i'}iu{u"}'

// the vowel grouping definitions below are used for checking vowel harmony
define vowel1 'a{i'}ou' // vowels that can end with suffixes containing 'a'
define vowel2 'ei{o"}{u"}' // vowels that can end with suffixes containing 'e'
define vowel3 'a{i'}' // vowels that can end with suffixes containing 'i''
define vowel4 'ei' // vowels that can end with suffixes containing 'i'
define vowel5 'ou' // vowels that can end with suffixes containing 'o' or 'u'
define vowel6 '{o"}{u"}' // vowels that can end with suffixes containing 'o"' or 'u"'

externals ( stem )

backwardmode (
// checks vowel harmony for possible suffixes,
// helps to detect whether the candidate for suffix applies to vowel harmony
// this rule is added to prevent over stemming
define check_vowel_harmony as (
test
(
(goto vowel) // if there is a vowel
(
('a' goto vowel1) or
('e' goto vowel2) or
('{i'}' goto vowel3) or
('i' goto vowel4) or
('o' goto vowel5) or
('{o"}' goto vowel6) or
('u' goto vowel5) or
('{u"}' goto vowel6)
)
)
)
// if the last consonant before suffix is vowel and n then advance and delete
// if the last consonant before suffix is non vowel and n do nothing
// if the last consonant before suffix is not n then only delete the suffix
// assumption: slice beginning is set correctly
define mark_suffix_with_optional_n_consonant as (
((test 'n') next (test vowel))
or
((not(test 'n')) test(next (test vowel)))

)
// if the last consonant before suffix is vowel and s then advance and delete
// if the last consonant before suffix is non vowel and s do nothing
// if the last consonant before suffix is not s then only delete the suffix
// assumption: slice beginning is set correctly
define mark_suffix_with_optional_s_consonant as (
((test 's') next (test vowel))
or
((not(test 's')) test(next (test vowel)))
)
// if the last consonant before suffix is vowel and y then advance and delete
// if the last consonant before suffix is non vowel and y do nothing
// if the last consonant before suffix is not y then only delete the suffix
// assumption: slice beginning is set correctly
define mark_suffix_with_optional_y_consonant as (
((test 'y') next (test vowel))
or
((not(test 'y')) test(next (test vowel)))
)
define mark_suffix_with_optional_U_vowel as (
((test U) next (test non-vowel))
or
((not(test U)) test(next (test non-vowel)))

)
define mark_possessives as (
among ('m{i'}z' 'miz' 'muz' 'm{u"}z'
'n{i'}z' 'niz' 'nuz' 'n{u"}z' 'm' 'n')
(mark_suffix_with_optional_U_vowel)
)
define mark_sU as (
check_vowel_harmony
U
(mark_suffix_with_optional_s_consonant)
)
define mark_lArI as (
among ('leri' 'lar{i'}')
)
define mark_yU as (
check_vowel_harmony
U
(mark_suffix_with_optional_y_consonant)
)
define mark_nU as (
check_vowel_harmony
among ('n{i'}' 'ni' 'nu' 'n{u"}')
)
define mark_nUn as (
check_vowel_harmony
among ('{i'}n' 'in' 'un' '{u"}n')
(mark_suffix_with_optional_n_consonant)
)
define mark_yA as (
check_vowel_harmony
among('a' 'e')
(mark_suffix_with_optional_y_consonant)
)
define mark_nA as (
check_vowel_harmony
among('na' 'ne')
)
define mark_DA as (
check_vowel_harmony
among('da' 'de' 'ta' 'te')
)
define mark_ndA as (
check_vowel_harmony
among('nda' 'nde')
)
define mark_DAn as (
check_vowel_harmony
among('dan' 'den' 'tan' 'ten')
)
define mark_ndAn as (
check_vowel_harmony
among('ndan' 'nden')
)
define mark_ylA as (
check_vowel_harmony
among('la' 'le')
(mark_suffix_with_optional_y_consonant)
)
define mark_ki as (
'ki'
)
define mark_ncA as (
check_vowel_harmony
among('ca' 'ce')
(mark_suffix_with_optional_n_consonant)
)
define mark_yUm as (
check_vowel_harmony
among ('{i'}m' 'im' 'um' '{u"}m')
(mark_suffix_with_optional_y_consonant)
)
define mark_sUn as (
check_vowel_harmony
among ('s{i'}n' 'sin' 'sun' 's{u"}n' )
)
define mark_yUz as (
check_vowel_harmony
among ('{i'}z' 'iz' 'uz' '{u"}z')
(mark_suffix_with_optional_y_consonant)
)
define mark_sUnUz as (
among ('s{i'}n{i'}z' 'siniz' 'sunuz' 's{u"}n{u"}z')
)
define mark_lAr as (
check_vowel_harmony
among ('ler' 'lar')
)
define mark_nUz as (
check_vowel_harmony
among ('n{i'}z' 'niz' 'nuz' 'n{u"}z')
)
define mark_DUr as (
check_vowel_harmony
among ('t{i'}r' 'tir' 'tur' 't{u"}r' 'd{i'}r' 'dir' 'dur' 'd{u"}r')
)
define mark_cAsInA as (
among ('cas{i'}na' 'cesine')
)
define mark_yDU as (
check_vowel_harmony
among ('t{i'}m' 'tim' 'tum' 't{u"}m' 'd{i'}m' 'dim' 'dum' 'd{u"}m'
't{i'}n' 'tin' 'tun' 't{u"}n' 'd{i'}n' 'din' 'dun' 'd{u"}n'
't{i'}k' 'tik' 'tuk' 't{u"}k' 'd{i'}k' 'dik' 'duk' 'd{u"}k'
't{i'}' 'ti' 'tu' 't{u"}' 'd{i'}' 'di' 'du' 'd{u"}')
(mark_suffix_with_optional_y_consonant)
)

// does not fully obey vowel harmony
define mark_ysA as (
among ('sam' 'san' 'sak' 'sem' 'sen' 'sek' 'sa' 'se')
(mark_suffix_with_optional_y_consonant)
)
define mark_ymUs_ as (
check_vowel_harmony
among ('m{i'}{s.}' 'mi{s.}' 'mu{s.}' 'm{u"}{s.}')
(mark_suffix_with_optional_y_consonant)
)
define mark_yken as (
'ken' (mark_suffix_with_optional_y_consonant)
)
define stem_nominal_verb_suffixes as (
[
set continue_stemming_noun_suffixes
(mark_ymUs_ or mark_yDU or mark_ysA or mark_yken)
or
(mark_cAsInA (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_)
or
(
mark_lAr ] delete try([(mark_DUr or mark_yDU or mark_ysA or mark_ymUs_))
unset continue_stemming_noun_suffixes
)
or
(mark_nUz (mark_yDU or mark_ysA))
or
((mark_sUnUz or mark_yUz or mark_sUn or mark_yUm) ] delete try([ mark_ymUs_))
or
(mark_DUr ] delete try([ (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_))
]delete
)
// stems noun suffix chains ending with -ki
define stem_suffix_chain_before_ki as (
[
mark_ki
(
(mark_DA] delete try([
(mark_lAr] delete try(stem_suffix_chain_before_ki))
or
(mark_possessives] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
))
or
(mark_nUn] delete try([
(mark_lArI] delete)
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
(stem_suffix_chain_before_ki)
))
or
(mark_ndA (
(mark_lArI] delete)
or
((mark_sU] delete try([mark_lAr]delete stem_suffix_chain_before_ki)))
or
(stem_suffix_chain_before_ki)
))
)
)
define stem_noun_suffixes as (
([mark_lAr] delete try(stem_suffix_chain_before_ki))
or
([mark_ncA] delete
try(
([mark_lArI] delete)
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
([mark_lAr] delete stem_suffix_chain_before_ki)
)
)
or
([(mark_ndA or mark_nA)
(
(mark_lArI] delete)
or
(mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
(stem_suffix_chain_before_ki)
)
)
or
([(mark_ndAn or mark_nU) ((mark_sU ] delete try([mark_lAr] delete stem_suffix_chain_before_ki)) or (mark_lArI)))
or
( [mark_DAn] delete try ([
(
(mark_possessives ] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
(mark_lAr] delete try(stem_suffix_chain_before_ki))
or
(stem_suffix_chain_before_ki)
))
)
or
([mark_nUn or mark_ylA] delete
try(
([mark_lAr] delete stem_suffix_chain_before_ki)
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
stem_suffix_chain_before_ki
)
)
or
([mark_lArI] delete)
or
(stem_suffix_chain_before_ki)
or
([mark_DA or mark_yU or mark_yA] delete try([((mark_possessives] delete try([mark_lAr)) or mark_lAr) ] delete [ stem_suffix_chain_before_ki))
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
)
define post_process_last_consonants as (
[substring] among (
'b' (<- 'p')
'c' (<- '{c.}')
'd' (<- 't')
'{g~}' (<- 'k')
)
)

// after stemming if the word ends with 'd' or 'g' most probably last U is overstemmed
// like in 'kedim' -> 'ked'
// Turkish words don't usually end with 'd' or 'g'
// some very well known words are ignored (like 'ad' 'soyad'
// appends U to stems ending with d or g, decides which vowel to add
// based on the last vowel in the stem
define append_U_to_stems_ending_with_d_or_g as (
test('d' or 'g')
(test((goto vowel) 'a' or '{i'}') <+ '{i'}')
or
(test((goto vowel) 'e' or 'i') <+ 'i')
or
(test((goto vowel) 'o' or 'u') <+ 'u')
or
(test((goto vowel) '{o"}' or '{u"}') <+ '{u"}')
)
)

// Tests if there are more than one syllables
// In Turkish each vowel indicates a distinct syllable
define more_than_one_syllable_word as (
test (atleast 2 (gopast vowel))
)

define is_reserved_word as (
test(gopast 'ad' ($strlen = 2) ($strlen == limit))
or
test(gopast 'soyad' ($strlen = 5) ($strlen == limit))
)

define postlude as (
not(is_reserved_word)
backwards (
do append_U_to_stems_ending_with_d_or_g
do post_process_last_consonants
)
)

define stem as (
(more_than_one_syllable_word)
(
backwards (
do stem_nominal_verb_suffixes
continue_stemming_noun_suffixes
do stem_noun_suffixes
)
postlude
)
)



+ 959
- 0
contrib/snowball/compiler/analyser.c View File

@@ -0,0 +1,959 @@

#include <stdio.h> /* printf etc */
#include <stdlib.h> /* exit */
#include <string.h> /* memmove */
#include "header.h"

/* recursive usage: */

static void read_program_(struct analyser * a, int terminator);
static struct node * read_C(struct analyser * a);
static struct node * C_style(struct analyser * a, char * s, int token);


static void fault(int n) { fprintf(stderr, "fault %d\n", n); exit(1); }

static void print_node_(struct node * p, int n, const char * s) {

int i;
for (i = 0; i < n; i++) fputs(i == n - 1 ? s : " ", stdout);
printf("%s ", name_of_token(p->type));
unless (p->name == 0) report_b(stdout, p->name->b);
unless (p->literalstring == 0) {
printf("'");
report_b(stdout, p->literalstring);
printf("'");
}
printf("\n");
unless (p->AE == 0) print_node_(p->AE, n+1, "# ");
unless (p->left == 0) print_node_(p->left, n+1, " ");
unless (p->right == 0) print_node_(p->right, n, " ");
if (p->aux != 0) print_node_(p->aux, n+1, "@ ");
}

extern void print_program(struct analyser * a) {
print_node_(a->program, 0, " ");
}

static struct node * new_node(struct analyser * a, int type) {
NEW(node, p);
p->next = a->nodes; a->nodes = p;
p->left = 0;
p->right = 0;
p->aux = 0;
p->AE = 0;
p->name = 0;
p->literalstring = 0;
p->mode = a->mode;
p->line_number = a->tokeniser->line_number;
p->type = type;
return p;
}

static const char * name_of_mode(int n) {
switch (n) {
default: fault(0);
case m_backward: return "string backward";
case m_forward: return "string forward";
/* case m_integer: return "integer"; */
}
}

static const char * name_of_type(int n) {
switch (n) {
default: fault(1);
case 's': return "string";
case 'i': return "integer";
case 'r': return "routine";
case 'R': return "routine or grouping";
case 'g': return "grouping";
}
}

static void count_error(struct analyser * a) {
struct tokeniser * t = a->tokeniser;
if (t->error_count >= 20) { fprintf(stderr, "... etc\n"); exit(1); }
t->error_count++;
}

static void error2(struct analyser * a, int n, int x) {
struct tokeniser * t = a->tokeniser;
count_error(a);
fprintf(stderr, "%s:%d: ", t->file, t->line_number);
if (n >= 30) report_b(stderr, t->b);
switch (n) {
case 0:
fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
case 3:
fprintf(stderr, "in among(...), ");
case 1:
fprintf(stderr, "unexpected %s", name_of_token(t->token));
if (t->token == c_number) fprintf(stderr, " %d", t->number);
if (t->token == c_name) {
fprintf(stderr, " ");
report_b(stderr, t->b);
} break;
case 2:
fprintf(stderr, "string omitted"); break;

case 14:
fprintf(stderr, "unresolved substring on line %d", x); break;
case 15:
fprintf(stderr, "%s not allowed inside reverse(...)", name_of_token(t->token)); break;
case 16:
fprintf(stderr, "empty grouping"); break;
case 17:
fprintf(stderr, "backwards used when already in this mode"); break;
case 18:
fprintf(stderr, "empty among(...)"); break;
case 19:
fprintf(stderr, "two adjacent bracketed expressions in among(...)"); break;
case 20:
fprintf(stderr, "substring preceded by another substring on line %d", x); break;

case 30:
fprintf(stderr, " re-declared"); break;
case 31:
fprintf(stderr, " undeclared"); break;
case 32:
fprintf(stderr, " declared as %s mode; used as %s mode",
name_of_mode(a->mode), name_of_mode(x)); break;
case 33:
fprintf(stderr, " not of type %s", name_of_type(x)); break;
case 34:
fprintf(stderr, " not of type string or integer"); break;
case 35:
fprintf(stderr, " misplaced"); break;
case 36:
fprintf(stderr, " redefined"); break;
case 37:
fprintf(stderr, " mis-used as %s mode",
name_of_mode(x)); break;
default:
fprintf(stderr, " error %d", n); break;

}
if (n <= 13 && t->previous_token > 0)
fprintf(stderr, " after %s", name_of_token(t->previous_token));
fprintf(stderr, "\n");
}

static void error(struct analyser * a, int n) { error2(a, n, 0); }

static void error3(struct analyser * a, struct node * p, symbol * b) {
count_error(a);
fprintf(stderr, "%s:%d: among(...) has repeated string '", a->tokeniser->file, p->line_number);
report_b(stderr, b);
fprintf(stderr, "'\n");
}

static void error4(struct analyser * a, struct name * q) {
count_error(a);
report_b(stderr, q->b);
fprintf(stderr, " undefined\n");
}

static void omission_error(struct analyser * a, int n) {
a->tokeniser->omission = n;
error(a, 0);
}

static int check_token(struct analyser * a, int code) {
struct tokeniser * t = a->tokeniser;
if (t->token != code) { omission_error(a, code); return false; }
return true;
}

static int get_token(struct analyser * a, int code) {
struct tokeniser * t = a->tokeniser;
read_token(t);
{
int x = check_token(a, code);
unless (x) t->token_held = true;
return x;
}
}

static struct name * look_for_name(struct analyser * a) {
struct name * p = a->names;
symbol * q = a->tokeniser->b;
repeat {
if (p == 0) return 0;
{ symbol * b = p->b;
int n = SIZE(b);
if (n == SIZE(q) && memcmp(q, b, n * sizeof(symbol)) == 0) {
p->referenced = true;
return p;
}
}
p = p->next;
}
}

static struct name * find_name(struct analyser * a) {
struct name * p = look_for_name(a);
if (p == 0) error(a, 31);
return p;
}

static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
if (p->mode < 0) p->mode = mode; else
unless (p->mode == mode) error2(a, 37, mode);
}

static void check_name_type(struct analyser * a, struct name * p, int type) {
switch (type) {
case 's': if (p->type == t_string) return; break;
case 'i': if (p->type == t_integer) return; break;
case 'b': if (p->type == t_boolean) return; break;
case 'R': if (p->type == t_grouping) return;
case 'r': if (p->type == t_routine ||
p->type == t_external) return; break;
case 'g': if (p->type == t_grouping) return; break;
}
error2(a, 33, type);
}

static void read_names(struct analyser * a, int type) {
struct tokeniser * t = a->tokeniser;
unless (get_token(a, c_bra)) return;
repeat {
if (read_token(t) != c_name) break;
if (look_for_name(a) != 0) error(a, 30); else {
NEW(name, p);
p->b = copy_b(t->b);
p->type = type;
p->mode = -1; /* routines, externals */
p->count = a->name_count[type];
p->referenced = false;
p->used = false;
p->grouping = 0;
p->definition = 0;
a->name_count[type] ++;
p->next = a->names;
a->names = p;
}
}
unless (check_token(a, c_ket)) t->token_held = true;
}

static symbol * new_literalstring(struct analyser * a) {
NEW(literalstring, p);
p->b = copy_b(a->tokeniser->b);
p->next = a->literalstrings;
a->literalstrings = p;
return p->b;
}

static int read_AE_test(struct analyser * a) {

struct tokeniser * t = a->tokeniser;
switch (read_token(t)) {
case c_assign: return c_mathassign;
case c_plusassign:
case c_minusassign:
case c_multiplyassign:
case c_divideassign:
case c_eq:
case c_ne:
case c_gr:
case c_ge:
case c_ls:
case c_le: return t->token;
default: error(a, 1); t->token_held = true; return c_eq;
}
}

static int binding(int t) {
switch (t) {
case c_plus: case c_minus: return 1;
case c_multiply: case c_divide: return 2;
default: return -2;
}
}

static void name_to_node(struct analyser * a, struct node * p, int type) {
struct name * q = find_name(a);
unless (q == 0) {
check_name_type(a, q, type);
q->used = true;
}
p->name = q;
}

static struct node * read_AE(struct analyser * a, int B) {
struct tokeniser * t = a->tokeniser;
struct node * p;
struct node * q;
switch (read_token(t)) {
case c_minus: /* monadic */
p = new_node(a, c_neg);
p->right = read_AE(a, 100);
break;
case c_bra:
p = read_AE(a, 0);
get_token(a, c_ket);
break;
case c_name:
p = new_node(a, c_name);
name_to_node(a, p, 'i');
break;
case c_maxint:
case c_minint:
case c_cursor:
case c_limit:
case c_size:
p = new_node(a, t->token);
break;
case c_number:
p = new_node(a, c_number);
p->number = t->number;
break;
case c_sizeof:
p = C_style(a, "s", c_sizeof);
break;
default:
error(a, 1);
t->token_held = true;
return 0;
}
repeat {
int token = read_token(t);
int b = binding(token);
unless (binding(token) > B) {
t->token_held = true;
return p;
}
q = new_node(a, token);
q->left = p;
q->right = read_AE(a, b);
p = q;
}
}

static struct node * read_C_connection(struct analyser * a, struct node * q, int op) {
struct tokeniser * t = a->tokeniser;
struct node * p = new_node(a, op);
struct node * p_end = q;
p->left = q;
repeat {
q = read_C(a);
p_end->right = q; p_end = q;
if (read_token(t) != op) {
t->token_held = true;
break;
}
}
return p;
}

static struct node * read_C_list(struct analyser * a) {
struct tokeniser * t = a->tokeniser;
struct node * p = new_node(a, c_bra);
struct node * p_end = 0;
repeat {
int token = read_token(t);
if (token == c_ket) return p;
if (token < 0) { omission_error(a, c_ket); return p; }
t->token_held = true;
{
struct node * q = read_C(a);
repeat {
token = read_token(t);
if (token != c_and && token != c_or) {
t->token_held = true;
break;
}
q = read_C_connection(a, q, token);
}
if (p_end == 0) p->left = q; else p_end->right = q;
p_end = q;
}
}
}

static struct node * C_style(struct analyser * a, char * s, int token) {
int i;
struct node * p = new_node(a, token);
for (i = 0; s[i] != 0; i++) switch(s[i]) {
case 'C':
p->left = read_C(a); continue;
case 'D':
p->aux = read_C(a); continue;
case 'A':
p->AE = read_AE(a, 0); continue;
case 'f':
get_token(a, c_for); continue;
case 'S':
{
int str_token = read_token(a->tokeniser);
if (str_token == c_name) name_to_node(a, p, 's'); else
if (str_token == c_literalstring) p->literalstring = new_literalstring(a);
else error(a, 2);
}
continue;
case 'b':
case 's':
case 'i':
if (get_token(a, c_name)) name_to_node(a, p, s[i]);
continue;
}
return p;
}

static struct node * read_literalstring(struct analyser * a) {
struct node * p = new_node(a, c_literalstring);
p->literalstring = new_literalstring(a);
return p;
}

static void reverse_b(symbol * b) {
int i = 0; int j = SIZE(b) - 1;
until (i >= j) {
int ch1 = b[i]; int ch2 = b[j];
b[i++] = ch2; b[j--] = ch1;
}
}

static int compare_amongvec(const void *pv, const void *qv) {
const struct amongvec * p = (const struct amongvec*)pv;
const struct amongvec * q = (const struct amongvec*)qv;
symbol * b_p = p->b; int p_size = p->size;
symbol * b_q = q->b; int q_size = q->size;
int smaller_size = p_size < q_size ? p_size : q_size;
int i;
for (i = 0; i < smaller_size; i++)
if (b_p[i] != b_q[i]) return b_p[i] - b_q[i];
return p_size - q_size;
}

static void make_among(struct analyser * a, struct node * p, struct node * substring) {

NEW(among, x);
NEWVEC(amongvec, v, p->number);
struct node * q = p->left;
struct amongvec * w0 = v;
struct amongvec * w1 = v;
int result = 1;

int direction = substring != 0 ? substring->mode : p->mode;
int backward = direction == m_backward;

if (a->amongs == 0) a->amongs = x; else a->amongs_end->next = x;
a->amongs_end = x;
x->next = 0;
x->b = v;
x->number = a->among_count++;
x->starter = 0;

if (q->type == c_bra) { x->starter = q; q = q->right; }

until (q == 0) {
if (q->type == c_literalstring) {
symbol * b = q->literalstring;
w1->b = b; /* pointer to case string */
w1->p = 0; /* pointer to corresponding case expression */
w1->size = SIZE(b); /* number of characters in string */
w1->i = -1; /* index of longest substring */
w1->result = -1; /* number of corresponding case expression */
w1->function = q->left == 0 ? 0 : q->left->name;
unless (w1->function == 0)
check_routine_mode(a, w1->function, direction);
w1++;
}
else
if (q->left == 0) /* empty command: () */
w0 = w1;
else {
until (w0 == w1) {
w0->p = q;
w0->result = result;
w0++;
}
result++;
}
q = q->right;
}
unless (w1-v == p->number) { fprintf(stderr, "oh! %d %d\n", (int)(w1-v), p->number); exit(1); }
if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
qsort(v, w1 - v, sizeof(struct amongvec), compare_amongvec);

/* the following loop is O(n squared) */
for (w0 = w1 - 1; w0 >= v; w0--) {
symbol * b = w0->b;
int size = w0->size;
struct amongvec * w;

for (w = w0 - 1; w >= v; w--) {
if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
w0->i = w - v; /* fill in index of longest substring */
break;
}
}
}
if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);

for (w0 = v; w0 < w1 - 1; w0++)
if (w0->size == (w0 + 1)->size &&
memcmp(w0->b, (w0 + 1)->b, w0->size * sizeof(symbol)) == 0) error3(a, p, w0->b);

x->literalstring_count = p->number;
x->command_count = result - 1;
p->among = x;

x->substring = substring;
if (substring != 0) substring->among = x;
unless (x->command_count == 0 && x->starter == 0) a->amongvar_needed = true;
}

static struct node * read_among(struct analyser * a) {
struct tokeniser * t = a->tokeniser;
struct node * p = new_node(a, c_among);
struct node * p_end = 0;
int previous_token = -1;
struct node * substring = a->substring;

a->substring = 0;
p->number = 0; /* counts the number of literals */
unless (get_token(a, c_bra)) return p;
repeat {
struct node * q;
int token = read_token(t);
switch (token) {
case c_literalstring:
q = read_literalstring(a);
if (read_token(t) == c_name) {
struct node * r = new_node(a, c_name);
name_to_node(a, r, 'r');
q->left = r;
}
else t->token_held = true;
p->number++; break;
case c_bra:
if (previous_token == c_bra) error(a, 19);
q = read_C_list(a); break;
default:
error(a, 3);
case c_ket:
if (p->number == 0) error(a, 18);
if (t->error_count == 0) make_among(a, p, substring);
return p;
}
previous_token = token;
if (p_end == 0) p->left = q; else p_end->right = q;
p_end = q;
}
}

static struct node * read_substring(struct analyser * a) {

struct node * p = new_node(a, c_substring);
if (a->substring != 0) error2(a, 20, a->substring->line_number);
a->substring = p;
return p;
}

static void check_modifyable(struct analyser * a) {
unless (a->modifyable) error(a, 15);
}

static struct node * read_C(struct analyser * a) {
struct tokeniser * t = a->tokeniser;
int token = read_token(t);
switch (token) {
case c_bra:
return read_C_list(a);
case c_backwards:
{
int mode = a->mode;
if (a->mode == m_backward) error(a, 17); else a->mode = m_backward;
{ struct node * p = C_style(a, "C", token);
a->mode = mode;
return p;
}
}
case c_reverse:
{
int mode = a->mode;
int modifyable = a->modifyable;
a->modifyable = false;
a->mode = mode == m_forward ? m_backward : m_forward;
{
struct node * p = C_style(a, "C", token);
a->mode = mode;
a->modifyable = modifyable;
return p;
}
}
case c_not:
case c_try:
case c_fail:
case c_test:
case c_do:
case c_goto:
case c_gopast:
case c_repeat:
return C_style(a, "C", token);
case c_loop:
case c_atleast:
return C_style(a, "AC", token);
case c_setmark:
return C_style(a, "i", token);
case c_tomark:
case c_atmark:
case c_hop:
return C_style(a, "A", token);
case c_delete:
check_modifyable(a);
case c_next:
case c_tolimit:
case c_atlimit:
case c_leftslice:
case c_rightslice:
case c_true:
case c_false:
case c_debug:
return C_style(a, "", token);
case c_assignto:
case c_sliceto:
check_modifyable(a);
return C_style(a, "s", token);
case c_assign:
case c_insert:
case c_attach:
case c_slicefrom:
check_modifyable(a);
return C_style(a, "S", token);
case c_setlimit:
return C_style(a, "CfD", token);
case c_set:
case c_unset:
return C_style(a, "b", token);
case c_dollar:
get_token(a, c_name);
{
struct node * p;
struct name * q = find_name(a);
int mode = a->mode;
int modifyable = a->modifyable;
switch (q ? q->type : t_string)
/* above line was: switch (q->type) - bug #1 fix 7/2/2003 */
{
default: error(a, 34);
case t_string:
a->mode = m_forward;
a->modifyable = true;
p = new_node(a, c_dollar);
p->left = read_C(a); break;
case t_integer:
/* a->mode = m_integer; */
p = new_node(a, read_AE_test(a));
p->AE = read_AE(a, 0); break;
}
p->name = q;
a->mode = mode;
a->modifyable = modifyable;
return p;
}
case c_name:
{
struct name * q = find_name(a);
struct node * p = new_node(a, c_name);
unless (q == 0) {
q->used = true;
switch (q->type) {
case t_boolean:
p->type = c_booltest; break;
case t_integer:
error(a, 35); /* integer name misplaced */
case t_string:
break;
case t_routine:
case t_external:
p->type = c_call;
check_routine_mode(a, q, a->mode);
break;
case t_grouping:
p->type = c_grouping; break;
}
}
p->name = q;
return p;
}
case c_non:
{
struct node * p = new_node(a, token);
read_token(t);
if (t->token == c_minus) read_token(t);
unless (check_token(a, c_name)) { omission_error(a, c_name); return p; }
name_to_node(a, p, 'g');
return p;
}
case c_literalstring:
return read_literalstring(a);
case c_among: return read_among(a);
case c_substring: return read_substring(a);
default: error(a, 1); return 0;
}
}

static int next_symbol(symbol * p, symbol * W, int utf8) {
if (utf8) {
int ch;
int j = get_utf8(p, & ch);
W[0] = ch; return j;
} else {
W[0] = p[0]; return 1;
}
}

static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
int j = 0;
symbol W[1];
int width;
if (style == c_plus) {
while (j < SIZE(q)) {
width = next_symbol(q + j, W, utf8);
p = add_to_b(p, 1, W);
j += width;
}
} else {
while (j < SIZE(q)) {
int i;
width = next_symbol(q + j, W, utf8);
for (i = 0; i < SIZE(p); i++) {
if (p[i] == W[0]) {
memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
SIZE(p)--;
}
}
j += width;
}
}
return p;
}

static void read_define_grouping(struct analyser * a, struct name * q) {
struct tokeniser * t = a->tokeniser;
int style = c_plus;
{
NEW(grouping, p);
if (a->groupings == 0) a->groupings = p; else a->groupings_end->next = p;
a->groupings_end = p;
q->grouping = p;
p->next = 0;
p->name = q;
p->number = q->count;
p->b = create_b(0);
repeat {
switch (read_token(t)) {
case c_name:
{
struct name * r = find_name(a);
unless (r == 0) {
check_name_type(a, r, 'g');
p->b = alter_grouping(p->b, r->grouping->b, style, false);
}
}
break;
case c_literalstring:
p->b = alter_grouping(p->b, t->b, style, a->utf8);
break;
default: error(a, 1); return;
}
switch (read_token(t)) {
case c_plus:
case c_minus: style = t->token; break;
default: goto label0;
}
}
label0:
{
int i;
int max = 0;
int min = 1<<16;
for (i = 0; i < SIZE(p->b); i++) {
if (p->b[i] > max) max = p->b[i];
if (p->b[i] < min) min = p->b[i];
}
p->largest_ch = max;
p->smallest_ch = min;
if (min == 1<<16) error(a, 16);
}
t->token_held = true; return;
}
}

static void read_define_routine(struct analyser * a, struct name * q) {
struct node * p = new_node(a, c_define);
a->amongvar_needed = false;
unless (q == 0) {
check_name_type(a, q, 'R');
if (q->definition != 0) error(a, 36);
if (q->mode < 0) q->mode = a->mode; else
if (q->mode != a->mode) error2(a, 32, q->mode);
}
p->name = q;
if (a->program == 0) a->program = p; else a->program_end->right = p;
a->program_end = p;
get_token(a, c_as);
p->left = read_C(a);
unless (q == 0) q->definition = p->left;

if (a->substring != 0) {
error2(a, 14, a->substring->line_number);
a->substring = 0;
}
p->amongvar_needed = a->amongvar_needed;
}

static void read_define(struct analyser * a) {
unless (get_token(a, c_name)) return;
{
struct name * q = find_name(a);
if (q != 0 && q->type == t_grouping) read_define_grouping(a, q);
else read_define_routine(a, q);
}
}

static void read_backwardmode(struct analyser * a) {
int mode = a->mode;
a->mode = m_backward;
if (get_token(a, c_bra)) {
read_program_(a, c_ket);
check_token(a, c_ket);
}
a->mode = mode;
}

static void read_program_(struct analyser * a, int terminator) {
struct tokeniser * t = a->tokeniser;
repeat {
switch (read_token(t)) {
case c_strings: read_names(a, t_string); break;
case c_booleans: read_names(a, t_boolean); break;
case c_integers: read_names(a, t_integer); break;
case c_routines: read_names(a, t_routine); break;
case c_externals: read_names(a, t_external); break;
case c_groupings: read_names(a, t_grouping); break;
case c_define: read_define(a); break;
case c_backwardmode:read_backwardmode(a); break;
case c_ket:
if (terminator == c_ket) return;
default:
error(a, 1); break;
case -1:
unless (terminator < 0) omission_error(a, c_ket);
return;
}
}
}

extern void read_program(struct analyser * a) {
read_program_(a, -1);
{
struct name * q = a->names;
until (q == 0) {
switch(q->type) {
case t_external: case t_routine:
if (q->used && q->definition == 0) error4(a, q); break;
case t_grouping:
if (q->used && q->grouping == 0) error4(a, q); break;
}
q = q->next;
}
}

if (a->tokeniser->error_count == 0) {
struct name * q = a->names;
int warned = false;
until (q == 0) {
unless (q->referenced) {
unless (warned) {
fprintf(stderr, "Declared but not used:");
warned = true;
}
fprintf(stderr, " "); report_b(stderr, q->b);
}
q = q->next;
}
if (warned) fprintf(stderr, "\n");

q = a->names;
warned = false;
until (q == 0) {
if (! q->used && (q->type == t_routine ||
q->type == t_grouping)) {
unless (warned) {
fprintf(stderr, "Declared and defined but not used:");
warned = true;
}
fprintf(stderr, " "); report_b(stderr, q->b);
}
q = q->next;
}
if (warned) fprintf(stderr, "\n");
}
}

extern struct analyser * create_analyser(struct tokeniser * t) {
NEW(analyser, a);
a->tokeniser = t;
a->nodes = 0;
a->names = 0;
a->literalstrings = 0;
a->program = 0;
a->amongs = 0;
a->among_count = 0;
a->groupings = 0;
a->mode = m_forward;
a->modifyable = true;
{ int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
a->substring = 0;
return a;
}

extern void close_analyser(struct analyser * a) {
{
struct node * q = a->nodes;
until (q == 0) {
struct node * q_next = q->next;
FREE(q);
q = q_next;
}
}
{
struct name * q = a->names;
until (q == 0) {
struct name * q_next = q->next;
lose_b(q->b); FREE(q);
q = q_next;
}
}
{
struct literalstring * q = a->literalstrings;
until (q == 0) {
struct literalstring * q_next = q->next;
lose_b(q->b); FREE(q);
q = q_next;
}
}
{
struct among * q = a->amongs;
until (q == 0) {
struct among * q_next = q->next;
FREE(q->b); FREE(q);
q = q_next;
}
}
{
struct grouping * q = a->groupings;
until (q == 0) {
struct grouping * q_next = q->next;
lose_b(q->b); FREE(q);
q = q_next;
}
}
FREE(a);
}


+ 257
- 0
contrib/snowball/compiler/driver.c View File

@@ -0,0 +1,257 @@
#include <stdio.h> /* for fprintf etc */
#include <stdlib.h> /* for free etc */
#include <string.h> /* for strlen */
#include "header.h"

#define DEFAULT_PACKAGE "org.tartarus.snowball.ext"
#define DEFAULT_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
#define DEFAULT_AMONG_CLASS "org.tartarus.snowball.Among"
#define DEFAULT_STRING_CLASS "java.lang.StringBuilder"

static int eq(const char * s1, const char * s2) {
int s1_len = strlen(s1);
int s2_len = strlen(s2);
return s1_len == s2_len && memcmp(s1, s2, s1_len) == 0;
}

static void print_arglist(void) {
fprintf(stderr, "Usage: snowball <file> [options]\n\n"
"options are: [-o[utput] file]\n"
" [-s[yntax]]\n"
#ifndef DISABLE_JAVA
" [-j[ava]]\n"
#endif
" [-c++]\n"
" [-w[idechars]]\n"
" [-u[tf8]]\n"
" [-n[ame] class name]\n"
" [-ep[refix] string]\n"
" [-vp[refix] string]\n"
" [-i[nclude] directory]\n"
" [-r[untime] path to runtime headers]\n"
#ifndef DISABLE_JAVA
" [-p[arentclassname] fully qualified parent class name]\n"
" [-P[ackage] package name for stemmers]\n"
" [-S[tringclass] StringBuffer-compatible class]\n"
" [-a[mongclass] fully qualified name of the Among class]\n"
#endif
);
exit(1);
}

static void check_lim(int i, int argc) {
if (i >= argc) {
fprintf(stderr, "argument list is one short\n");
print_arglist();
}
}

static FILE * get_output(symbol * b) {
char * s = b_to_s(b);
FILE * output = fopen(s, "w");
if (output == 0) {
fprintf(stderr, "Can't open output %s\n", s);
exit(1);
}
free(s);
return output;
}

static void read_options(struct options * o, int argc, char * argv[]) {
char * s;
int i = 2;

/* set defaults: */

o->output_file = 0;
o->syntax_tree = false;
o->externals_prefix = "";
o->variables_prefix = 0;
o->runtime_path = 0;
o->parent_class_name = DEFAULT_BASE_CLASS;
o->string_class = DEFAULT_STRING_CLASS;
o->among_class = DEFAULT_AMONG_CLASS;
o->package = DEFAULT_PACKAGE;
o->name = "";
o->make_lang = LANG_C;
o->widechars = false;
o->includes = 0;
o->includes_end = 0;
o->utf8 = false;

/* read options: */

repeat {
if (i >= argc) break;
s = argv[i++];
{ if (eq(s, "-o") || eq(s, "-output")) {
check_lim(i, argc);
o->output_file = argv[i++];
continue;
}
if (eq(s, "-n") || eq(s, "-name")) {
check_lim(i, argc);
o->name = argv[i++];
continue;
}
#ifndef DISABLE_JAVA
if (eq(s, "-j") || eq(s, "-java")) {
o->make_lang = LANG_JAVA;
o->widechars = true;
continue;
}
#endif
if (eq(s, "-c++")) {
o->make_lang = LANG_CPLUSPLUS;
continue;
}
if (eq(s, "-w") || eq(s, "-widechars")) {
o->widechars = true;
o->utf8 = false;
continue;
}
if (eq(s, "-s") || eq(s, "-syntax")) {
o->syntax_tree = true;
continue;
}
if (eq(s, "-ep") || eq(s, "-eprefix")) {
check_lim(i, argc);
o->externals_prefix = argv[i++];
continue;
}
if (eq(s, "-vp") || eq(s, "-vprefix")) {
check_lim(i, argc);
o->variables_prefix = argv[i++];
continue;
}
if (eq(s, "-i") || eq(s, "-include")) {
check_lim(i, argc);

{
NEW(include, p);
symbol * b = add_s_to_b(0, argv[i++]);
b = add_s_to_b(b, "/");
p->next = 0; p->b = b;

if (o->includes == 0) o->includes = p; else
o->includes_end->next = p;
o->includes_end = p;
}
continue;
}
if (eq(s, "-r") || eq(s, "-runtime")) {
check_lim(i, argc);
o->runtime_path = argv[i++];
continue;
}
if (eq(s, "-u") || eq(s, "-utf8")) {
o->utf8 = true;
o->widechars = false;
continue;
}
#ifndef DISABLE_JAVA
if (eq(s, "-p") || eq(s, "-parentclassname")) {
check_lim(i, argc);
o->parent_class_name = argv[i++];
continue;
}
if (eq(s, "-P") || eq(s, "-Package")) {
check_lim(i, argc);
o->package = argv[i++];
continue;
}
if (eq(s, "-S") || eq(s, "-stringclass")) {
check_lim(i, argc);
o->string_class = argv[i++];
continue;
}
if (eq(s, "-a") || eq(s, "-amongclass")) {
check_lim(i, argc);
o->among_class = argv[i++];
continue;
}
#endif
fprintf(stderr, "'%s' misplaced\n", s);
print_arglist();
}
}
}

extern int main(int argc, char * argv[]) {

NEW(options, o);
if (argc == 1) print_arglist();
read_options(o, argc, argv);
{
symbol * filename = add_s_to_b(0, argv[1]);
char * file;
symbol * u = get_input(filename, &file);
if (u == 0) {
fprintf(stderr, "Can't open input %s\n", argv[1]);
exit(1);
}
{
struct tokeniser * t = create_tokeniser(u, file);
struct analyser * a = create_analyser(t);
t->widechars = o->widechars;
t->includes = o->includes;
a->utf8 = t->utf8 = o->utf8;
read_program(a);
if (t->error_count > 0) exit(1);
if (o->syntax_tree) print_program(a);
close_tokeniser(t);
unless (o->syntax_tree) {
struct generator * g;

char * s = o->output_file;
unless (s) {
fprintf(stderr, "Please include the -o option\n");
print_arglist();
exit(1);
}
if (o->make_lang == LANG_C || o->make_lang == LANG_CPLUSPLUS) {
symbol * b = add_s_to_b(0, s);
b = add_s_to_b(b, ".h");
o->output_h = get_output(b);
b[SIZE(b) - 1] = 'c';
if (o->make_lang == LANG_CPLUSPLUS) {
b = add_s_to_b(b, "c");
}
o->output_c = get_output(b);
lose_b(b);

g = create_generator_c(a, o);
generate_program_c(g);
close_generator_c(g);
fclose(o->output_c);
fclose(o->output_h);
}
#ifndef DISABLE_JAVA
if (o->make_lang == LANG_JAVA) {
symbol * b = add_s_to_b(0, s);
b = add_s_to_b(b, ".java");
o->output_java = get_output(b);
lose_b(b);
g = create_generator_java(a, o);
generate_program_java(g);
close_generator_java(g);
fclose(o->output_java);
}
#endif
}
close_analyser(a);
}
lose_b(u);
lose_b(filename);
}
{ struct include * p = o->includes;
until (p == 0)
{ struct include * q = p->next;
lose_b(p->b); FREE(p); p = q;
}
}
FREE(o);
unless (space_count == 0) fprintf(stderr, "%d blocks unfreed\n", space_count);
return 0;
}


+ 1465
- 0
contrib/snowball/compiler/generator.c
File diff suppressed because it is too large
View File


+ 1452
- 0
contrib/snowball/compiler/generator_java.c
File diff suppressed because it is too large
View File


+ 324
- 0
contrib/snowball/compiler/header.h View File

@@ -0,0 +1,324 @@

typedef unsigned char byte;
typedef unsigned short symbol;

#define true 1
#define false 0
#define repeat while(true)
#define unless(C) if(!(C))
#define until(C) while(!(C))

#define MALLOC check_malloc
#define FREE check_free

#define NEW(type, p) struct type * p = (struct type *) MALLOC(sizeof(struct type))
#define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * n)

#define STARTSIZE 10
#define SIZE(p) ((int *)(p))[-1]
#define CAPACITY(p) ((int *)(p))[-2]

extern symbol * create_b(int n);
extern void report_b(FILE * out, symbol * p);
extern void lose_b(symbol * p);
extern symbol * increase_capacity(symbol * p, int n);
extern symbol * move_to_b(symbol * p, int n, symbol * q);
extern symbol * add_to_b(symbol * p, int n, symbol * q);
extern symbol * copy_b(symbol * p);
extern char * b_to_s(symbol * p);
extern symbol * add_s_to_b(symbol * p, const char * s);

struct str; /* defined in space.c */

extern struct str * str_new(void);
extern void str_delete(struct str * str);
extern void str_append(struct str * str, struct str * add);
extern void str_append_ch(struct str * str, char add);
extern void str_append_b(struct str * str, symbol * q);
extern void str_append_string(struct str * str, const char * s);
extern void str_append_int(struct str * str, int i);
extern void str_clear(struct str * str);
extern void str_assign(struct str * str, char * s);
extern struct str * str_copy(struct str * old);
extern symbol * str_data(struct str * str);
extern int str_len(struct str * str);
extern int get_utf8(const symbol * p, int * slot);
extern int put_utf8(int ch, symbol * p);

struct m_pair {

struct m_pair * next;
symbol * name;
symbol * value;

};

/* struct input must be a prefix of struct tokeniser. */
struct input {

struct input * next;
symbol * p;
int c;
char * file;
int line_number;

};

struct include {

struct include * next;
symbol * b;

};

/* struct input must be a prefix of struct tokeniser. */
struct tokeniser {

struct input * next;
symbol * p;
int c;
char * file;
int line_number;
symbol * b;
symbol * b2;
int number;
int m_start;
int m_end;
struct m_pair * m_pairs;
int get_depth;
int error_count;
int token;
int previous_token;
byte token_held;
byte widechars;
byte utf8;

int omission;
struct include * includes;

};

extern symbol * get_input(symbol * p, char ** p_file);
extern struct tokeniser * create_tokeniser(symbol * b, char * file);
extern int read_token(struct tokeniser * t);
extern const char * name_of_token(int code);
extern void close_tokeniser(struct tokeniser * t);

enum token_codes {

#include "syswords2.h"

c_mathassign,
c_name,
c_number,
c_literalstring,
c_neg,
c_call,
c_grouping,
c_booltest
};

extern int space_count;
extern void * check_malloc(int n);
extern void check_free(void * p);

struct node;

struct name {

struct name * next;
symbol * b;
int type; /* t_string etc */
int mode; /* )_ for routines, externals */
struct node * definition; /* ) */
int count; /* 0, 1, 2 for each type */
struct grouping * grouping; /* for grouping names */
byte referenced;
byte used;

};

struct literalstring {

struct literalstring * next;
symbol * b;

};

struct amongvec {

symbol * b; /* the string giving the case */
int size; /* - and its size */
struct node * p; /* the corresponding command */
int i; /* the amongvec index of the longest substring of b */
int result; /* the numeric result for the case */
struct name * function;

};

struct among {

struct among * next;
struct amongvec * b; /* pointer to the amongvec */
int number; /* amongs are numbered 0, 1, 2 ... */
int literalstring_count; /* in this among */
int command_count; /* in this among */
struct node * starter; /* i.e. among( (starter) 'string' ... ) */
struct node * substring; /* i.e. substring ... among ( ... ) */
};

struct grouping {

struct grouping * next;
int number; /* groupings are numbered 0, 1, 2 ... */
symbol * b; /* the characters of this group */
int largest_ch; /* character with max code */
int smallest_ch; /* character with min code */
byte no_gaps; /* not used in generator.c after 11/5/05 */
struct name * name; /* so g->name->grouping == g */
};

struct node {

struct node * next;
struct node * left;
struct node * aux; /* used in setlimit */
struct among * among; /* used in among */
struct node * right;
int type;
int mode;
struct node * AE;
struct name * name;
symbol * literalstring;
int number;
int line_number;
int amongvar_needed; /* used in routine definitions */
};

enum name_types {

t_size = 6,

t_string = 0, t_boolean = 1, t_integer = 2, t_routine = 3, t_external = 4,
t_grouping = 5

/* If this list is extended, adjust wvn in generator.c */
};

/* In name_count[i] below, remember that
type is
----+----
0 | string
1 | boolean
2 | integer
3 | routine
4 | external
5 | grouping
*/

struct analyser {

struct tokeniser * tokeniser;
struct node * nodes;
struct name * names;
struct literalstring * literalstrings;
int mode;
byte modifyable; /* false inside reverse(...) */
struct node * program;
struct node * program_end;
int name_count[t_size]; /* name_count[i] counts the number of names of type i */
struct among * amongs;
struct among * amongs_end;
int among_count;
int amongvar_needed; /* used in reading routine definitions */
struct grouping * groupings;
struct grouping * groupings_end;
struct node * substring; /* pending 'substring' in current routine definition */
byte utf8;
};

enum analyser_modes {

m_forward = 0, m_backward /*, m_integer */

};

extern void print_program(struct analyser * a);
extern struct analyser * create_analyser(struct tokeniser * t);
extern void close_analyser(struct analyser * a);

extern void read_program(struct analyser * a);

struct generator {

struct analyser * analyser;
struct options * options;
int unreachable; /* 0 if code can be reached, 1 if current code
* is unreachable. */
int var_number; /* Number of next variable to use. */
struct str * outbuf; /* temporary str to store output */
struct str * declarations; /* str storing variable declarations */
int next_label;
int margin;

const char * failure_string; /* String to output in case of a failure. */
#ifndef DISABLE_JAVA
struct str * failure_str; /* This is used by the java generator instead of failure_string */
#endif

int label_used; /* Keep track of whether the failure label is used. */
int failure_label;
int debug_count;

const char * S[10]; /* strings */
symbol * B[10]; /* blocks */
int I[10]; /* integers */
struct name * V[5]; /* variables */
symbol * L[5]; /* literals, used in formatted write */

int line_count; /* counts number of lines output */
int line_labelled; /* in ANSI C, will need extra ';' if it is a block end */
int literalstring_count;
int keep_count; /* used to number keep/restore pairs to avoid compiler warnings
about shadowed variables */
};

struct options {

/* for the command line: */

char * output_file;
char * name;
FILE * output_c;
FILE * output_h;
#ifndef DISABLE_JAVA
FILE * output_java;
#endif
byte syntax_tree;
byte widechars;
enum { LANG_JAVA, LANG_C, LANG_CPLUSPLUS } make_lang;
char * externals_prefix;
char * variables_prefix;
char * runtime_path;
char * parent_class_name;
char * package;
char * string_class;
char * among_class;
struct include * includes;
struct include * includes_end;
byte utf8;
};

/* Generator for C code. */
extern struct generator * create_generator_c(struct analyser * a, struct options * o);
extern void close_generator_c(struct generator * g);

extern void generate_program_c(struct generator * g);

#ifndef DISABLE_JAVA
/* Generator for Java code. */
extern struct generator * create_generator_java(struct analyser * a, struct options * o);
extern void close_generator_java(struct generator * g);

extern void generate_program_java(struct generator * g);
#endif

+ 263
- 0
contrib/snowball/compiler/space.c View File

@@ -0,0 +1,263 @@

#include <stdio.h> /* for printf */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memmove */

#include "header.h"

#define HEAD 2*sizeof(int)
#define EXTENDER 40


/* This modules provides a simple mechanism for arbitrary length writable
strings, called 'blocks'. They are 'symbol *' items rather than 'char *'
items however.

The calls are:

symbol * b = create_b(n);
- create an empty block b with room for n symbols
b = increase_capacity(b, n);
- increase the capacity of block b by n symbols (b may change)
b2 = copy_b(b)
- copy block b into b2
lose_b(b);
- lose block b
b = move_to_b(b, n, p);
- set the data in b to be the n symbols at address p
b = add_to_b(b, n, p);
- add the n symbols at address p to the end of the data in b
SIZE(b)
- is the number of symbols in b
For example:

symbol * b = create_b(0);
{ int i;
char p[10];
for (i = 0; i < 100; i++) {
sprintf(p, " %d", i);
add_s_to_b(b, p);
}
}

and b contains " 0 1 2 ... 99" spaced out as symbols.
*/

/* For a block b, SIZE(b) is the number of symbols so far written into it,
CAPACITY(b) the total number it can contain, so SIZE(b) <= CAPACITY(b).
In fact blocks have 1 extra character over the promised capacity so
they can be zero terminated by 'b[SIZE(b)] = 0;' without fear of
overwriting.
*/

extern symbol * create_b(int n) {
symbol * p = (symbol *) (HEAD + (char *) MALLOC(HEAD + (n + 1) * sizeof(symbol)));
CAPACITY(p) = n;
SIZE(p) = 0;
return p;
}

extern void report_b(FILE * out, symbol * p) {
int i;
for (i = 0; i < SIZE(p); i++) fprintf(out, "%c", p[i]);
}

extern void lose_b(symbol * p) {
if (p == 0) return;
FREE((char *) p - HEAD);
}

extern symbol * increase_capacity(symbol * p, int n) {
symbol * q = create_b(CAPACITY(p) + n + EXTENDER);
memmove(q, p, CAPACITY(p) * sizeof(symbol));
SIZE(q) = SIZE(p);
lose_b(p); return q;
}

extern symbol * move_to_b(symbol * p, int n, symbol * q) {
int x = n - CAPACITY(p);
if (x > 0) p = increase_capacity(p, x);
memmove(p, q, n * sizeof(symbol)); SIZE(p) = n; return p;
}

extern symbol * add_to_b(symbol * p, int n, symbol * q) {
int x = SIZE(p) + n - CAPACITY(p);
if (x > 0) p = increase_capacity(p, x);
memmove(p + SIZE(p), q, n * sizeof(symbol)); SIZE(p) += n; return p;
}

extern symbol * copy_b(symbol * p) {
int n = SIZE(p);
symbol * q = create_b(n);
move_to_b(q, n, p);
return q;
}

int space_count = 0;

extern void * check_malloc(int n) {
space_count++;
return malloc(n);
}

extern void check_free(void * p) {
space_count--;
free(p);
}

/* To convert a block to a zero terminated string: */

extern char * b_to_s(symbol * p) {
int n = SIZE(p);
char * s = (char *)malloc(n + 1);
{
int i;
for (i = 0; i < n; i++) {
if (p[i] > 255) {
printf("In b_to_s, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
exit(1);
}
s[i] = (char)p[i];
}
}
s[n] = 0;
return s;
}

/* To add a zero terminated string to a block. If p = 0 the
block is created. */

extern symbol * add_s_to_b(symbol * p, const char * s) {
int n = strlen(s);
int k;
if (p == 0) p = create_b(n);
k = SIZE(p);
{
int x = k + n - CAPACITY(p);
if (x > 0) p = increase_capacity(p, x);
}
{
int i;
for (i = 0; i < n; i++) p[i + k] = s[i];
}
SIZE(p) += n;
return p;
}

/* The next section defines string handling capabilities in terms
of the lower level block handling capabilities of space.c */
/* -------------------------------------------------------------*/

struct str {
symbol * data;
};

/* Create a new string. */
extern struct str * str_new() {

struct str * output = (struct str *) malloc(sizeof(struct str));
output->data = create_b(0);
return output;
}

/* Delete a string. */
extern void str_delete(struct str * str) {

lose_b(str->data);
free(str);
}

/* Append a str to this str. */
extern void str_append(struct str * str, struct str * add) {

symbol * q = add->data;
str->data = add_to_b(str->data, SIZE(q), q);
}

/* Append a character to this str. */
extern void str_append_ch(struct str * str, char add) {

symbol q[1];
q[0] = add;
str->data = add_to_b(str->data, 1, q);
}

/* Append a low level block to a str. */
extern void str_append_b(struct str * str, symbol * q) {

str->data = add_to_b(str->data, SIZE(q), q);
}

/* Append a (char *, null teminated) string to a str. */
extern void str_append_string(struct str * str, const char * s) {

str->data = add_s_to_b(str->data, s);
}

/* Append an integer to a str. */
extern void str_append_int(struct str * str, int i) {

char s[30];
sprintf(s, "%d", i);
str_append_string(str, s);
}

/* Clear a string */
extern void str_clear(struct str * str) {

SIZE(str->data) = 0;
}

/* Set a string */
extern void str_assign(struct str * str, char * s) {

str_clear(str);
str_append_string(str, s);
}

/* Copy a string. */
extern struct str * str_copy(struct str * old) {

struct str * newstr = str_new();
str_append(newstr, old);
return newstr;
}

/* Get the data stored in this str. */
extern symbol * str_data(struct str * str) {

return str->data;
}

/* Get the length of the str. */
extern int str_len(struct str * str) {

return SIZE(str->data);
}

extern int get_utf8(const symbol * p, int * slot) {
int b0, b1;
b0 = *p++;
if (b0 < 0xC0) { /* 1100 0000 */
* slot = b0; return 1;
}
b1 = *p++;
if (b0 < 0xE0) { /* 1110 0000 */
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
}
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (*p & 0x3F); return 3;
}

extern int put_utf8(int ch, symbol * p) {
if (ch < 0x80) {
p[0] = ch; return 1;
}
if (ch < 0x800) {
p[0] = (ch >> 6) | 0xC0;
p[1] = (ch & 0x3F) | 0x80; return 2;
}
p[0] = (ch >> 12) | 0xE0;
p[1] = ((ch >> 6) & 0x3F) | 0x80;
p[2] = (ch & 0x3F) | 0x80; return 3;
}


+ 84
- 0
contrib/snowball/compiler/syswords.h View File

@@ -0,0 +1,84 @@
static const struct system_word vocab[80+1] = {
{ 0, (const byte *)"", 80+1},

{ 1, (const byte *)"$", c_dollar },
{ 1, (const byte *)"(", c_bra },
{ 1, (const byte *)")", c_ket },
{ 1, (const byte *)"*", c_multiply },
{ 1, (const byte *)"+", c_plus },
{ 1, (const byte *)"-", c_minus },
{ 1, (const byte *)"/", c_divide },
{ 1, (const byte *)"<", c_ls },
{ 1, (const byte *)"=", c_assign },
{ 1, (const byte *)">", c_gr },
{ 1, (const byte *)"?", c_debug },
{ 1, (const byte *)"[", c_leftslice },
{ 1, (const byte *)"]", c_rightslice },
{ 2, (const byte *)"!=", c_ne },
{ 2, (const byte *)"*=", c_multiplyassign },
{ 2, (const byte *)"+=", c_plusassign },
{ 2, (const byte *)"-=", c_minusassign },
{ 2, (const byte *)"->", c_sliceto },
{ 2, (const byte *)"/*", c_comment2 },
{ 2, (const byte *)"//", c_comment1 },
{ 2, (const byte *)"/=", c_divideassign },
{ 2, (const byte *)"<+", c_insert },
{ 2, (const byte *)"<-", c_slicefrom },
{ 2, (const byte *)"<=", c_le },
{ 2, (const byte *)"==", c_eq },
{ 2, (const byte *)"=>", c_assignto },
{ 2, (const byte *)">=", c_ge },
{ 2, (const byte *)"as", c_as },
{ 2, (const byte *)"do", c_do },
{ 2, (const byte *)"or", c_or },
{ 3, (const byte *)"and", c_and },
{ 3, (const byte *)"for", c_for },
{ 3, (const byte *)"get", c_get },
{ 3, (const byte *)"hex", c_hex },
{ 3, (const byte *)"hop", c_hop },
{ 3, (const byte *)"non", c_non },
{ 3, (const byte *)"not", c_not },
{ 3, (const byte *)"set", c_set },
{ 3, (const byte *)"try", c_try },
{ 4, (const byte *)"fail", c_fail },
{ 4, (const byte *)"goto", c_goto },
{ 4, (const byte *)"loop", c_loop },
{ 4, (const byte *)"next", c_next },
{ 4, (const byte *)"size", c_size },
{ 4, (const byte *)"test", c_test },
{ 4, (const byte *)"true", c_true },
{ 5, (const byte *)"among", c_among },
{ 5, (const byte *)"false", c_false },
{ 5, (const byte *)"limit", c_limit },
{ 5, (const byte *)"unset", c_unset },
{ 6, (const byte *)"atmark", c_atmark },
{ 6, (const byte *)"attach", c_attach },
{ 6, (const byte *)"cursor", c_cursor },
{ 6, (const byte *)"define", c_define },
{ 6, (const byte *)"delete", c_delete },
{ 6, (const byte *)"gopast", c_gopast },
{ 6, (const byte *)"insert", c_insert },
{ 6, (const byte *)"maxint", c_maxint },
{ 6, (const byte *)"minint", c_minint },
{ 6, (const byte *)"repeat", c_repeat },
{ 6, (const byte *)"sizeof", c_sizeof },
{ 6, (const byte *)"tomark", c_tomark },
{ 7, (const byte *)"atleast", c_atleast },
{ 7, (const byte *)"atlimit", c_atlimit },
{ 7, (const byte *)"decimal", c_decimal },
{ 7, (const byte *)"reverse", c_reverse },
{ 7, (const byte *)"setmark", c_setmark },
{ 7, (const byte *)"strings", c_strings },
{ 7, (const byte *)"tolimit", c_tolimit },
{ 8, (const byte *)"booleans", c_booleans },
{ 8, (const byte *)"integers", c_integers },
{ 8, (const byte *)"routines", c_routines },
{ 8, (const byte *)"setlimit", c_setlimit },
{ 9, (const byte *)"backwards", c_backwards },
{ 9, (const byte *)"externals", c_externals },
{ 9, (const byte *)"groupings", c_groupings },
{ 9, (const byte *)"stringdef", c_stringdef },
{ 9, (const byte *)"substring", c_substring },
{ 12, (const byte *)"backwardmode", c_backwardmode },
{ 13, (const byte *)"stringescapes", c_stringescapes }
};

+ 13
- 0
contrib/snowball/compiler/syswords2.h View File

@@ -0,0 +1,13 @@
c_among = 4, c_and, c_as, c_assign, c_assignto, c_atleast,
c_atlimit, c_atmark, c_attach, c_backwardmode, c_backwards,
c_booleans, c_bra, c_comment1, c_comment2, c_cursor, c_debug,
c_decimal, c_define, c_delete, c_divide, c_divideassign, c_do,
c_dollar, c_eq, c_externals, c_fail, c_false, c_for, c_ge, c_get,
c_gopast, c_goto, c_gr, c_groupings, c_hex, c_hop, c_insert,
c_integers, c_ket, c_le, c_leftslice, c_limit, c_loop, c_ls,
c_maxint, c_minint, c_minus, c_minusassign, c_multiply,
c_multiplyassign, c_ne, c_next, c_non, c_not, c_or, c_plus,
c_plusassign, c_repeat, c_reverse, c_rightslice, c_routines,
c_set, c_setlimit, c_setmark, c_size, c_sizeof, c_slicefrom,
c_sliceto, c_stringdef, c_stringescapes, c_strings, c_substring,
c_test, c_tolimit, c_tomark, c_true, c_try, c_unset,

+ 470
- 0
contrib/snowball/compiler/tokeniser.c View File

@@ -0,0 +1,470 @@

#include <stdio.h> /* stderr etc */
#include <stdlib.h> /* malloc free */
#include <string.h> /* strlen */
#include <ctype.h> /* isalpha etc */
#include "header.h"

struct system_word {
int s_size; /* size of system word */
const byte * s; /* pointer to the system word */
int code; /* its internal code */
};


/* ASCII collating assumed in syswords.c */

#include "syswords.h"

static int smaller(int a, int b) { return a < b ? a : b; }

extern symbol * get_input(symbol * p, char ** p_file) {

char * s = b_to_s(p);
{
FILE * input = fopen(s, "r");
if (input == 0) { free(s); return 0; }
*p_file = s;
{
symbol * u = create_b(STARTSIZE);
int size = 0;
repeat
{ int ch = getc(input);
if (ch == EOF) break;
if (size >= CAPACITY(u)) u = increase_capacity(u, size/2);
u[size++] = ch;
}
fclose(input);
SIZE(u) = size; return u;
}
}
}

static void error(struct tokeniser * t, char * s1, int n, symbol * p, char * s2) {
if (t->error_count == 20) { fprintf(stderr, "... etc\n"); exit(1); }
fprintf(stderr, "%s:%d: ", t->file, t->line_number);
unless (s1 == 0) fprintf(stderr, "%s", s1);
unless (p == 0) {
int i;
for (i = 0; i < n; i++) fprintf(stderr, "%c", p[i]);
}
unless (s2 == 0) fprintf(stderr, "%s", s2);
fprintf(stderr, "\n");
t->error_count++;
}

static void error1(struct tokeniser * t, char * s) {
error(t, s, 0,0, 0);
}

static void error2(struct tokeniser * t, char * s) {
error(t, "unexpected end of text after ", 0,0, s);
}

static int compare_words(int m, symbol * p, int n, const byte * q) {
unless (m == n) return m - n;
{
int i; for (i = 0; i < n; i++) {
int diff = p[i] - q[i];
unless (diff == 0) return diff;
}
}
return 0;
}

static int find_word(int n, symbol * p) {
int i = 0; int j = vocab->code;
repeat {
int k = i + (j - i)/2;
const struct system_word * w = vocab + k;
int diff = compare_words(n, p, w->s_size, w->s);
if (diff == 0) return w->code;
if (diff < 0) j = k; else i = k;
if (j - i == 1) break;
}
return -1;
}

static int get_number(int n, symbol * p) {
int x = 0;
int i; for (i = 0; i < n; i++) x = 10*x + p[i] - '0';
return x;
}

static int eq_s(struct tokeniser * t, char * s) {
int l = strlen(s);
if (SIZE(t->p) - t->c < l) return false;
{
int i;
for (i = 0; i < l; i++) if (t->p[t->c + i] != s[i]) return false;
}
t->c += l; return true;
}

static int white_space(struct tokeniser * t, int ch) {
switch (ch) {
case '\n': t->line_number++;
case '\r':
case '\t':
case ' ': return true;
}
return false;
}

static symbol * find_in_m(struct tokeniser * t, int n, symbol * p) {
struct m_pair * q = t->m_pairs;
repeat {
if (q == 0) return 0;
{
symbol * name = q->name;
if (n == SIZE(name) && memcmp(name, p, n * sizeof(symbol)) == 0) return q->value;
}
q = q->next;
}
}

static int read_literal_string(struct tokeniser * t, int c) {
symbol * p = t->p;
int ch;
SIZE(t->b) = 0;
repeat {
if (c >= SIZE(p)) { error2(t, "'"); return c; }
ch = p[c];
if (ch == '\n') { error1(t, "string not terminated"); return c; }
c++;
if (ch == t->m_start) {
int c0 = c;
int newlines = false; /* no newlines as yet */
int black_found = false; /* no printing chars as yet */
repeat {
if (c >= SIZE(p)) { error2(t, "'"); return c; }
ch = p[c]; c++;
if (ch == t->m_end) break;
unless (white_space(t, ch)) black_found = true;
if (ch == '\n') newlines = true;
if (newlines && black_found) {
error1(t, "string not terminated");
return c;
}
}
unless (newlines) {
int n = c - c0 - 1; /* macro size */
int firstch = p[c0];
symbol * q = find_in_m(t, n, p + c0);
if (q == 0) {
if (n == 1 && (firstch == '\'' || firstch == t->m_start))
t->b = add_to_b(t->b, 1, p + c0);
else
error(t, "string macro '", n, p + c0, "' undeclared");
} else
t->b = add_to_b(t->b, SIZE(q), q);
}
} else {
if (ch == '\'') return c;
t->b = add_to_b(t->b, 1, p + c - 1);
}
}
}

static int next_token(struct tokeniser * t) {
symbol * p = t->p;
int c = t->c;
int ch;
int code = -1;
repeat {
if (c >= SIZE(p)) { t->c = c; return -1; }
ch = p[c];
if (white_space(t, ch)) { c++; continue; }
if (isalpha(ch)) {
int c0 = c;
while (c < SIZE(p) && (isalnum(p[c]) || p[c] == '_')) c++;
code = find_word(c - c0, p + c0);
if (code < 0) {
t->b = move_to_b(t->b, c - c0, p + c0);
code = c_name;
}
} else
if (isdigit(ch)) {
int c0 = c;
while (c < SIZE(p) && isdigit(p[c])) c++;
t->number = get_number(c - c0, p + c0);
code = c_number;
} else
if (ch == '\'') {
c = read_literal_string(t, c + 1);
code = c_literalstring;
} else
{
int lim = smaller(2, SIZE(p) - c);
int i;
for (i = lim; i > 0; i--) {
code = find_word(i, p + c);
if (code >= 0) { c += i; break; }
}
}
if (code >= 0) {
t->c = c;
return code;
}
error(t, "'", 1, p + c, "' unknown");
c++;
continue;
}
}

static int next_char(struct tokeniser * t) {
if (t->c >= SIZE(t->p)) return -1;
return t->p[t->c++];
}

static int next_real_char(struct tokeniser * t) {
repeat {
int ch = next_char(t);
if (white_space(t, ch)) continue;
return ch;
}
}

static void read_chars(struct tokeniser * t) {
int ch = next_real_char(t);
if (ch < 0) { error2(t, "stringdef"); return; }
{
int c0 = t->c-1;
repeat {
ch = next_char(t);
if (white_space(t, ch) || ch < 0) break;
}
t->b2 = move_to_b(t->b2, t->c - c0 - 1, t->p + c0);
}
}

static int decimal_to_num(int ch) {
if ('0' <= ch && ch <= '9') return ch - '0';
return -1;
}

static int hex_to_num(int ch) {
if ('0' <= ch && ch <= '9') return ch - '0';
if ('a' <= ch && ch <= 'f') return ch - 'a' + 10;
return -1;
}

static void convert_numeric_string(struct tokeniser * t, symbol * p, int base) {
int c = 0; int d = 0;
repeat {
while (c < SIZE(p) && p[c] == ' ') c++;
if (c == SIZE(p)) break;
{
int number = 0;
repeat {
int ch = p[c];
if (c == SIZE(p) || ch == ' ') break;
if (base == 10) {
ch = decimal_to_num(ch);
if (ch < 0) {
error1(t, "decimal string contains non-digits");
return;
}
} else {
ch = hex_to_num(tolower(ch));
if (ch < 0) {
error1(t, "hex string contains non-hex characters");
return;
}
}
number = base * number + ch;
c++;
}
if (t->widechars || t->utf8) {
unless (0 <= number && number <= 0xffff) {
error1(t, "character values exceed 64K");
return;
}
} else {
unless (0 <= number && number <= 0xff) {
error1(t, "character values exceed 256");
return;
}
}
if (t->utf8)
d += put_utf8(number, p + d);
else
p[d++] = number;
}
}
SIZE(p) = d;
}

extern int read_token(struct tokeniser * t) {
symbol * p = t->p;
int held = t->token_held;
t->token_held = false;
if (held) return t->token;
repeat {
int code = next_token(t);
switch (code) {
case c_comment1: /* slash-slash comment */
while (t->c < SIZE(p) && p[t->c] != '\n') t->c++;
continue;
case c_comment2: /* slash-star comment */
repeat {
if (t->c >= SIZE(p)) {
error1(t, "/* comment not terminated");
t->token = -1;
return -1;
}
if (p[t->c] == '\n') t->line_number++;
if (eq_s(t, "*/")) break;
t->c++;
}
continue;
case c_stringescapes:
{
int ch1 = next_real_char(t);
int ch2 = next_real_char(t);
if (ch2 < 0)
{ error2(t, "stringescapes"); continue; }
if (ch1 == '\'')
{ error1(t, "first stringescape cannot be '"); continue; }
t->m_start = ch1;
t->m_end = ch2;
}
continue;
case c_stringdef:
{
int base = 0;
read_chars(t);
code = read_token(t);
if (code == c_hex) { base = 16; code = read_token(t); } else
if (code == c_decimal) { base = 10; code = read_token(t); }
unless (code == c_literalstring)
{ error1(t, "string omitted after stringdef"); continue; }
if (base > 0) convert_numeric_string(t, t->b, base);
{ NEW(m_pair, q);
q->next = t->m_pairs;
q->name = copy_b(t->b2);
q->value = copy_b(t->b);
t->m_pairs = q;
}
}
continue;
case c_get:
code = read_token(t);
unless (code == c_literalstring) {
error1(t, "string omitted after get"); continue;
}
t->get_depth++;
if (t->get_depth > 10) {
fprintf(stderr, "get directives go 10 deep. Looping?\n");
exit(1);
}
{
char * file;
NEW(input, q);
symbol * u = get_input(t->b, &file);
if (u == 0) {
struct include * r = t->includes;
until (r == 0) {
symbol * b = copy_b(r->b);
b = add_to_b(b, SIZE(t->b), t->b);
u = get_input(b, &file);
lose_b(b);
unless (u == 0) break;
r = r->next;
}
}
if (u == 0) {
error(t, "Can't get '", SIZE(t->b), t->b, "'");
exit(1);
}
memmove(q, t, sizeof(struct input));
t->next = q;
t->p = u;
t->c = 0;
t->file = file;
t->line_number = 1;
}
p = t->p;
continue;
case -1:
unless (t->next == 0) {
lose_b(p);
{
struct input * q = t->next;
memmove(t, q, sizeof(struct input)); p = t->p;
FREE(q);
}
t->get_depth--;
continue;
}
/* drop through */
default:
t->previous_token = t->token;
t->token = code;
return code;
}
}
}

extern const char * name_of_token(int code) {
int i;
for (i = 1; i < vocab->code; i++)
if ((vocab + i)->code == code) return (const char *)(vocab + i)->s;
switch (code) {
case c_mathassign: return "=";
case c_name: return "name";
case c_number: return "number";
case c_literalstring:return "literal";
case c_neg: return "neg";
case c_grouping: return "grouping";
case c_call: return "call";
case c_booltest: return "Boolean test";
case -2: return "start of text";
case -1: return "end of text";
default: return "?";
}
}

extern struct tokeniser * create_tokeniser(symbol * p, char * file) {
NEW(tokeniser, t);
t->next = 0;
t->p = p;
t->c = 0;
t->file = file;
t->line_number = 1;
t->b = create_b(0);
t->b2 = create_b(0);
t->m_start = -1;
t->m_pairs = 0;
t->get_depth = 0;
t->error_count = 0;
t->token_held = false;
t->token = -2;
t->previous_token = -2;
return t;
}

extern void close_tokeniser(struct tokeniser * t) {
lose_b(t->b);
lose_b(t->b2);
{
struct m_pair * q = t->m_pairs;
until (q == 0) {
struct m_pair * q_next = q->next;
lose_b(q->name);
lose_b(q->value);
FREE(q);
q = q_next;
}
}
{
struct input * q = t->next;
until (q == 0) {
struct input * q_next = q->next;
FREE(q);
q = q_next;
}
}
free(t->file);
FREE(t);
}

+ 15
- 0
contrib/snowball/doc/TODO View File

@@ -0,0 +1,15 @@
Things to do:

- Write documentation for how to use libstemmer (as opposed to how stemming
algorithms themselves work).
Currently, the documentation in the include/libstemmer.h header file is
pretty clear and comprehensive, but an overview document wouldn't go amiss.

Things that would be nice to include at some point.

- Add version numbers to each stemming algorithm, and allow the interface to
request a specific version of the stemming algorithms. Default to providing
the latest version of the algorithm.
- Make mkmodules.pl generate the build system, instead of being called from it.
This would allow it to generate the list of modules to be built, so that it's
not necessary to change things in more than one place to add a new algorithm.

+ 125
- 0
contrib/snowball/doc/libstemmer_c_README View File

@@ -0,0 +1,125 @@
libstemmer_c
============

This document pertains to the C version of the libstemmer distribution,
available for download from:

http://snowball.tartarus.org/dist/libstemmer_c.tgz


Compiling the library
=====================

A simple makefile is provided for Unix style systems. On such systems, it
should be possible simply to run "make", and the file "libstemmer.o"
and the example program "stemwords" will be generated.

If this doesn't work on your system, you need to write your own build
system (or call the compiler directly). The files to compile are
all contained in the "libstemmer", "runtime" and "src_c" directories,
and the public header file is contained in the "include" directory.

The library comes in two flavours; UTF-8 only, and UTF-8 plus other character
sets. To use the utf-8 only flavour, compile "libstemmer_utf8.c" instead of
"libstemmer.c".

For convenience "mkinc.mak" is a makefile fragment listing the source files and
header files used to compile the standard version of the library.
"mkinc_utf8.mak" is a comparable makefile fragment listing just the source
files for the UTF-8 only version of the library.


Using the library
=================

The library provides a simple C API. Essentially, a new stemmer can
be obtained by using "sb_stemmer_new". "sb_stemmer_stem" is then
used to stem a word, "sb_stemmer_length" returns the stemmed
length of the last word processed, and "sb_stemmer_delete" is
used to delete a stemmer.

Creating a stemmer is a relatively expensive operation - the expected
usage pattern is that a new stemmer is created when needed, used
to stem many words, and deleted after some time.

Stemmers are re-entrant, but not threadsafe. In other words, if
you wish to access the same stemmer object from multiple threads,
you must ensure that all access is protected by a mutex or similar
device.

libstemmer does not currently incorporate any mechanism for caching the results
of stemming operations. Such caching can greatly increase the performance of a
stemmer under certain situations, so suitable patches will be considered for
inclusion.

The standard libstemmer sources contain an algorithm for each of the supported
languages. The algorithm may be selected using the english name of the
language, or using the 2 or 3 letter ISO 639 language codes. In addition,
the traditional "Porter" stemming algorithm for english is included for
backwards compatibility purposes, but we recommend use of the "English"
stemmer in preference for new projects.

(Some minor algorithms which are included only as curiosities in the snowball
website, such as the Lovins stemmer and the Kraaij Pohlmann stemmer, are not
included in the standard libstemmer sources. These are not really supported by
the snowball project, but it would be possible to compile a modified libstemmer
library containing these if desired.)


The stemwords example
=====================

The stemwords example program allows you to run any of the stemmers
compiled into the libstemmer library on a sample vocabulary. For
details on how to use it, run it with the "-h" command line option.


Using the library in a larger system
====================================

If you are incorporating the library into the build system of a larger
program, I recommend copying the unpacked tarball without modification into
a subdirectory of the sources of your program. Future versions of the
library are intended to keep the same structure, so this will keep the
work required to move to a new version of the library to a minimum.

As an additional convenience, the list of source and header files used
in the library is detailed in mkinc.mak - a file which is in a suitable
format for inclusion by a Makefile. By including this file in your build
system, you can link the snowball system into your program with a few
extra rules.

Using the library in a system using GNU autotools
=================================================

The libstemmer_c library can be integrated into a larger system which uses the
GNU autotool framework (and in particular, automake and autoconf) as follows:

1) Unpack libstemmer_c.tgz in the top level project directory so that there is
a libstemmer_c subdirectory of the top level directory of the project.

2) Add a file "Makefile.am" to the unpacked libstemmer_c folder, containing:
noinst_LTLIBRARIES = libstemmer.la
include $(srcdir)/mkinc.mak
noinst_HEADERS = $(snowball_headers)
libstemmer_la_SOURCES = $(snowball_sources)

(You may also need to add other lines to this, for example, if you are using
compiler options which are not compatible with compiling the libstemmer
library.)

3) Add libstemmer_c to the AC_CONFIG_FILES declaration in the project's
configure.ac file.

4) Add to the top level makefile the following lines (or modify existing
assignments to these variables appropriately):

AUTOMAKE_OPTIONS = subdir-objects
AM_CPPFLAGS = -I$(top_srcdir)/libstemmer_c/include
SUBDIRS=libstemmer_c
<name>_LIBADD = libstemmer_c/libstemmer.la

(Where <name> is the name of the library or executable which links against
libstemmer.)


+ 40
- 0
contrib/snowball/doc/libstemmer_java_README View File

@@ -0,0 +1,40 @@
libstemmer_java
===============

This document pertains to the Java version of the libstemmer distribution,
available for download from:

http://snowball.tartarus.org/dist/libstemmer_java.tgz


Compiling the library
=====================

Simply run the java compiler on all the java source files under the java
directory. For example, this can be done under unix by changing directory into
the java directory, and running:

javac org/tartarus/snowball/*.java org/tartarus/snowball/ext/*.java

This will compile the library and also an example program "TestApp" which
provides a command line interface to the library.


Using the library
=================

There is currently no formal documentation on the use of the Java version
of the library. Additionally, its interface is not guaranteed to be
stable.

The best documentation of the library is the source of the TestApp example
program.


The TestApp example
===================

The TestApp example program allows you to run any of the stemmers
compiled into the libstemmer library on a sample vocabulary. For
details on how to use it, run it with no command line parameters.


+ 209
- 0
contrib/snowball/examples/stemwords.c View File

@@ -0,0 +1,209 @@
/* This is a simple program which uses libstemmer to provide a command
* line interface for stemming using any of the algorithms provided.
*/

#include <stdio.h>
#include <stdlib.h> /* for malloc, free */
#include <string.h> /* for memmove */
#include <ctype.h> /* for isupper, tolower */

#include "libstemmer.h"

const char * progname;
static int pretty = 1;

static void
stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out)
{
#define INC 10
int lim = INC;
sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol));

while(1) {
int ch = getc(f_in);
if (ch == EOF) {
free(b); return;
}
{
int i = 0;
int inlen = 0;
while(1) {
if (ch == '\n' || ch == EOF) break;
if (i == lim) {
sb_symbol * newb;
newb = (sb_symbol *)
realloc(b, (lim + INC) * sizeof(sb_symbol));
if (newb == 0) goto error;
b = newb;
lim = lim + INC;
}
/* Update count of utf-8 characters. */
if (ch < 0x80 || ch > 0xBF) inlen += 1;
/* force lower case: */
if (isupper(ch)) ch = tolower(ch);

b[i] = ch;
i++;
ch = getc(f_in);
}

{
const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i);
if (stemmed == NULL)
{
fprintf(stderr, "Out of memory");
exit(1);
}
else
{
if (pretty == 1) {
fwrite(b, i, 1, f_out);
fputs(" -> ", f_out);
} else if (pretty == 2) {
fwrite(b, i, 1, f_out);
if (sb_stemmer_length(stemmer) > 0) {
int j;
if (inlen < 30) {
for (j = 30 - inlen; j > 0; j--)
fputs(" ", f_out);
} else {
fputs("\n", f_out);
for (j = 30; j > 0; j--)
fputs(" ", f_out);
}
}
}

fputs((const char *)stemmed, f_out);
putc('\n', f_out);
}
}
}
}
error:
if (b != 0) free(b);
return;
}

/** Display the command line syntax, and then exit.
* @param n The value to exit with.
*/
static void
usage(int n)
{
printf("usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
"\n"
"The input file consists of a list of words to be stemmed, one per\n"
"line. Words should be in lower case, but (for English) A-Z letters\n"
"are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
"used.\n"
"\n"
"If -c is given, the argument is the character encoding of the input\n"
"and output files. If it is omitted, the UTF-8 encoding is used.\n"
"\n"
"If -p is given the output file consists of each word of the input\n"
"file followed by \"->\" followed by its stemmed equivalent.\n"
"If -p2 is given the output file is a two column layout containing\n"
"the input words in the first column and the stemmed equivalents in\n"
"the second column.\n"
"Otherwise, the output file consists of the stemmed words, one per\n"
"line.\n"
"\n"
"-h displays this help\n",
progname);
exit(n);
}

int
main(int argc, char * argv[])
{
char * in = 0;
char * out = 0;
FILE * f_in;
FILE * f_out;
struct sb_stemmer * stemmer;

char * language = "english";
char * charenc = NULL;

char * s;
int i = 1;
pretty = 0;

progname = argv[0];

while(i < argc) {
s = argv[i++];
if (s[0] == '-') {
if (strcmp(s, "-o") == 0) {
if (i >= argc) {
fprintf(stderr, "%s requires an argument\n", s);
exit(1);
}
out = argv[i++];
} else if (strcmp(s, "-i") == 0) {
if (i >= argc) {
fprintf(stderr, "%s requires an argument\n", s);
exit(1);
}
in = argv[i++];
} else if (strcmp(s, "-l") == 0) {
if (i >= argc) {
fprintf(stderr, "%s requires an argument\n", s);
exit(1);
}
language = argv[i++];
} else if (strcmp(s, "-c") == 0) {
if (i >= argc) {
fprintf(stderr, "%s requires an argument\n", s);
exit(1);
}
charenc = argv[i++];
} else if (strcmp(s, "-p2") == 0) {
pretty = 2;
} else if (strcmp(s, "-p") == 0) {
pretty = 1;
} else if (strcmp(s, "-h") == 0) {
usage(0);
} else {
fprintf(stderr, "option %s unknown\n", s);
usage(1);
}
} else {
fprintf(stderr, "unexpected parameter %s\n", s);
usage(1);
}
}

/* prepare the files */
f_in = (in == 0) ? stdin : fopen(in, "r");
if (f_in == 0) {
fprintf(stderr, "file %s not found\n", in);
exit(1);
}
f_out = (out == 0) ? stdout : fopen(out, "w");
if (f_out == 0) {
fprintf(stderr, "file %s cannot be opened\n", out);
exit(1);
}

/* do the stemming process: */
stemmer = sb_stemmer_new(language, charenc);
if (stemmer == 0) {
if (charenc == NULL) {
fprintf(stderr, "language `%s' not available for stemming\n", language);
exit(1);
} else {
fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
exit(1);
}
}
stem_file(stemmer, f_in, f_out);
sb_stemmer_delete(stemmer);

if (in != 0) (void) fclose(f_in);
if (out != 0) (void) fclose(f_out);

return 0;
}


+ 79
- 0
contrib/snowball/include/libstemmer.h View File

@@ -0,0 +1,79 @@

/* Make header file work when included from C++ */
#ifdef __cplusplus
extern "C" {
#endif

struct sb_stemmer;
typedef unsigned char sb_symbol;

/* FIXME - should be able to get a version number for each stemming
* algorithm (which will be incremented each time the output changes). */

/** Returns an array of the names of the available stemming algorithms.
* Note that these are the canonical names - aliases (ie, other names for
* the same algorithm) will not be included in the list.
* The list is terminated with a null pointer.
*
* The list must not be modified in any way.
*/
const char ** sb_stemmer_list(void);

/** Create a new stemmer object, using the specified algorithm, for the
* specified character encoding.
*
* All algorithms will usually be available in UTF-8, but may also be
* available in other character encodings.
*
* @param algorithm The algorithm name. This is either the english
* name of the algorithm, or the 2 or 3 letter ISO 639 codes for the
* language. Note that case is significant in this parameter - the
* value should be supplied in lower case.
*
* @param charenc The character encoding. NULL may be passed as
* this value, in which case UTF-8 encoding will be assumed. Otherwise,
* the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1),
* "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian). Note that
* case is significant in this parameter.
*
* @return NULL if the specified algorithm is not recognised, or the
* algorithm is not available for the requested encoding. Otherwise,
* returns a pointer to a newly created stemmer for the requested algorithm.
* The returned pointer must be deleted by calling sb_stemmer_delete().
*
* @note NULL will also be returned if an out of memory error occurs.
*/
struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc);

/** Delete a stemmer object.
*
* This frees all resources allocated for the stemmer. After calling
* this function, the supplied stemmer may no longer be used in any way.
*
* It is safe to pass a null pointer to this function - this will have
* no effect.
*/
void sb_stemmer_delete(struct sb_stemmer * stemmer);

/** Stem a word.
*
* The return value is owned by the stemmer - it must not be freed or
* modified, and it will become invalid when the stemmer is called again,
* or if the stemmer is freed.
*
* The length of the return value can be obtained using sb_stemmer_length().
*
* If an out-of-memory error occurs, this will return NULL.
*/
const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer,
const sb_symbol * word, int size);

/** Get the length of the result of the last stemmed word.
* This should not be called before sb_stemmer_stem() has been called.
*/
int sb_stemmer_length(struct sb_stemmer * stemmer);

#ifdef __cplusplus
}
#endif


+ 31
- 0
contrib/snowball/java/org/tartarus/snowball/Among.java View File

@@ -0,0 +1,31 @@
package org.tartarus.snowball;

import java.lang.reflect.Method;

public class Among {
public Among (String s, int substring_i, int result,
String methodname, SnowballProgram methodobject) {
this.s_size = s.length();
this.s = s.toCharArray();
this.substring_i = substring_i;
this.result = result;
this.methodobject = methodobject;
if (methodname.length() == 0) {
this.method = null;
} else {
try {
this.method = methodobject.getClass().
getDeclaredMethod(methodname, new Class[0]);
} catch (NoSuchMethodException e) {
throw new RuntimeException(e);
}
}
}

public final int s_size; /* search string */
public final char[] s; /* search string */
public final int substring_i; /* index to longest matching substring */
public final int result; /* result of the lookup */
public final Method method; /* method to use if substring matches */
public final SnowballProgram methodobject; /* object to invoke method on */
};

+ 432
- 0
contrib/snowball/java/org/tartarus/snowball/SnowballProgram.java View File

@@ -0,0 +1,432 @@

package org.tartarus.snowball;
import java.lang.reflect.InvocationTargetException;

public class SnowballProgram {
protected SnowballProgram()
{
current = new StringBuffer();
setCurrent("");
}

/**
* Set the current string.
*/
public void setCurrent(String value)
{
current.replace(0, current.length(), value);
cursor = 0;
limit = current.length();
limit_backward = 0;
bra = cursor;
ket = limit;
}

/**
* Get the current string.
*/
public String getCurrent()
{
String result = current.toString();
// Make a new StringBuffer. If we reuse the old one, and a user of
// the library keeps a reference to the buffer returned (for example,
// by converting it to a String in a way which doesn't force a copy),
// the buffer size will not decrease, and we will risk wasting a large
// amount of memory.
// Thanks to Wolfram Esser for spotting this problem.
current = new StringBuffer();
return result;
}

// current string
protected StringBuffer current;

protected int cursor;
protected int limit;
protected int limit_backward;
protected int bra;
protected int ket;

protected void copy_from(SnowballProgram other)
{
current = other.current;
cursor = other.cursor;
limit = other.limit;
limit_backward = other.limit_backward;
bra = other.bra;
ket = other.ket;
}

protected boolean in_grouping(char [] s, int min, int max)
{
if (cursor >= limit) return false;
char ch = current.charAt(cursor);
if (ch > max || ch < min) return false;
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
cursor++;
return true;
}

protected boolean in_grouping_b(char [] s, int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current.charAt(cursor - 1);
if (ch > max || ch < min) return false;
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
cursor--;
return true;
}

protected boolean out_grouping(char [] s, int min, int max)
{
if (cursor >= limit) return false;
char ch = current.charAt(cursor);
if (ch > max || ch < min) {
cursor++;
return true;
}
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
cursor ++;
return true;
}
return false;
}

protected boolean out_grouping_b(char [] s, int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current.charAt(cursor - 1);
if (ch > max || ch < min) {
cursor--;
return true;
}
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
cursor--;
return true;
}
return false;
}

protected boolean in_range(int min, int max)
{
if (cursor >= limit) return false;
char ch = current.charAt(cursor);
if (ch > max || ch < min) return false;
cursor++;
return true;
}

protected boolean in_range_b(int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current.charAt(cursor - 1);
if (ch > max || ch < min) return false;
cursor--;
return true;
}

protected boolean out_range(int min, int max)
{
if (cursor >= limit) return false;
char ch = current.charAt(cursor);
if (!(ch > max || ch < min)) return false;
cursor++;
return true;
}

protected boolean out_range_b(int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current.charAt(cursor - 1);
if(!(ch > max || ch < min)) return false;
cursor--;
return true;
}

protected boolean eq_s(int s_size, String s)
{
if (limit - cursor < s_size) return false;
int i;
for (i = 0; i != s_size; i++) {
if (current.charAt(cursor + i) != s.charAt(i)) return false;
}
cursor += s_size;
return true;
}

protected boolean eq_s_b(int s_size, String s)
{
if (cursor - limit_backward < s_size) return false;
int i;
for (i = 0; i != s_size; i++) {
if (current.charAt(cursor - s_size + i) != s.charAt(i)) return false;
}
cursor -= s_size;
return true;
}

protected boolean eq_v(CharSequence s)
{
return eq_s(s.length(), s.toString());
}

protected boolean eq_v_b(CharSequence s)
{ return eq_s_b(s.length(), s.toString());
}

protected int find_among(Among v[], int v_size)
{
int i = 0;
int j = v_size;

int c = cursor;
int l = limit;

int common_i = 0;
int common_j = 0;

boolean first_key_inspected = false;

while(true) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; // smaller
Among w = v[k];
int i2;
for (i2 = common; i2 < w.s_size; i2++) {
if (c + common == l) {
diff = -1;
break;
}
diff = current.charAt(c + common) - w.s[i2];
if (diff != 0) break;
common++;
}
if (diff < 0) {
j = k;
common_j = common;
} else {
i = k;
common_i = common;
}
if (j - i <= 1) {
if (i > 0) break; // v->s has been inspected
if (j == i) break; // only one item in v

// - but now we need to go round once more to get
// v->s inspected. This looks messy, but is actually
// the optimal approach.

if (first_key_inspected) break;
first_key_inspected = true;
}
}
while(true) {
Among w = v[i];
if (common_i >= w.s_size) {
cursor = c + w.s_size;
if (w.method == null) return w.result;
boolean res;
try {
Object resobj = w.method.invoke(w.methodobject,
new Object[0]);
res = resobj.toString().equals("true");
} catch (InvocationTargetException e) {
res = false;
// FIXME - debug message
} catch (IllegalAccessException e) {
res = false;
// FIXME - debug message
}
cursor = c + w.s_size;
if (res) return w.result;
}
i = w.substring_i;
if (i < 0) return 0;
}
}

// find_among_b is for backwards processing. Same comments apply
protected int find_among_b(Among v[], int v_size)
{
int i = 0;
int j = v_size;

int c = cursor;
int lb = limit_backward;

int common_i = 0;
int common_j = 0;

boolean first_key_inspected = false;

while(true) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
Among w = v[k];
int i2;
for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
if (c - common == lb) {
diff = -1;
break;
}
diff = current.charAt(c - 1 - common) - w.s[i2];
if (diff != 0) break;
common++;
}
if (diff < 0) {
j = k;
common_j = common;
} else {
i = k;
common_i = common;
}
if (j - i <= 1) {
if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = true;
}
}
while(true) {
Among w = v[i];
if (common_i >= w.s_size) {
cursor = c - w.s_size;
if (w.method == null) return w.result;

boolean res;
try {
Object resobj = w.method.invoke(w.methodobject,
new Object[0]);
res = resobj.toString().equals("true");
} catch (InvocationTargetException e) {
res = false;
// FIXME - debug message
} catch (IllegalAccessException e) {
res = false;
// FIXME - debug message
}
cursor = c - w.s_size;
if (res) return w.result;
}
i = w.substring_i;
if (i < 0) return 0;
}
}

/* to replace chars between c_bra and c_ket in current by the
* chars in s.
*/
protected int replace_s(int c_bra, int c_ket, String s)
{
int adjustment = s.length() - (c_ket - c_bra);
current.replace(c_bra, c_ket, s);
limit += adjustment;
if (cursor >= c_ket) cursor += adjustment;
else if (cursor > c_bra) cursor = c_bra;
return adjustment;
}

protected void slice_check()
{
if (bra < 0 ||
bra > ket ||
ket > limit ||
limit > current.length()) // this line could be removed
{
System.err.println("faulty slice operation");
// FIXME: report error somehow.
/*
fprintf(stderr, "faulty slice operation:\n");
debug(z, -1, 0);
exit(1);
*/
}
}

protected void slice_from(String s)
{
slice_check();
replace_s(bra, ket, s);
}

protected void slice_from(CharSequence s)
{
slice_from(s.toString());
}

protected void slice_del()
{
slice_from("");
}

protected void insert(int c_bra, int c_ket, String s)
{
int adjustment = replace_s(c_bra, c_ket, s);
if (c_bra <= bra) bra += adjustment;
if (c_bra <= ket) ket += adjustment;
}

protected void insert(int c_bra, int c_ket, CharSequence s)
{
insert(c_bra, c_ket, s.toString());
}

/* Copy the slice into the supplied StringBuffer */
protected StringBuffer slice_to(StringBuffer s)
{
slice_check();
int len = ket - bra;
s.replace(0, s.length(), current.substring(bra, ket));
return s;
}

/* Copy the slice into the supplied StringBuilder */
protected StringBuilder slice_to(StringBuilder s)
{
slice_check();
int len = ket - bra;
s.replace(0, s.length(), current.substring(bra, ket));
return s;
}

protected StringBuffer assign_to(StringBuffer s)
{
s.replace(0, s.length(), current.substring(0, limit));
return s;
}

protected StringBuilder assign_to(StringBuilder s)
{
s.replace(0, s.length(), current.substring(0, limit));
return s;
}

/*
extern void debug(struct SN_env * z, int number, int line_count)
{ int i;
int limit = SIZE(z->p);
//if (number >= 0) printf("%3d (line %4d): '", number, line_count);
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
for (i = 0; i <= limit; i++)
{ if (z->lb == i) printf("{");
if (z->bra == i) printf("[");
if (z->c == i) printf("|");
if (z->ket == i) printf("]");
if (z->l == i) printf("}");
if (i < limit)
{ int ch = z->p[i];
if (ch == 0) ch = '#';
printf("%c", ch);
}
}
printf("'\n");
}
*/

};

+ 7
- 0
contrib/snowball/java/org/tartarus/snowball/SnowballStemmer.java View File

@@ -0,0 +1,7 @@

package org.tartarus.snowball;
import java.lang.reflect.InvocationTargetException;

public abstract class SnowballStemmer extends SnowballProgram {
public abstract boolean stem();
};

+ 77
- 0
contrib/snowball/java/org/tartarus/snowball/TestApp.java View File

@@ -0,0 +1,77 @@

package org.tartarus.snowball;

import java.lang.reflect.Method;
import java.io.Reader;
import java.io.Writer;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.OutputStream;
import java.io.FileOutputStream;

public class TestApp {
private static void usage()
{
System.err.println("Usage: TestApp <algorithm> <input file> [-o <output file>]");
}

public static void main(String [] args) throws Throwable {
if (args.length < 2) {
usage();
return;
}

Class stemClass = Class.forName("org.tartarus.snowball.ext." +
args[0] + "Stemmer");
SnowballStemmer stemmer = (SnowballStemmer) stemClass.newInstance();

Reader reader;
reader = new InputStreamReader(new FileInputStream(args[1]));
reader = new BufferedReader(reader);

StringBuffer input = new StringBuffer();

OutputStream outstream;

if (args.length > 2) {
if (args.length >= 4 && args[2].equals("-o")) {
outstream = new FileOutputStream(args[3]);
} else {
usage();
return;
}
} else {
outstream = System.out;
}
Writer output = new OutputStreamWriter(outstream);
output = new BufferedWriter(output);

int repeat = 1;
if (args.length > 4) {
repeat = Integer.parseInt(args[4]);
}

Object [] emptyArgs = new Object[0];
int character;
while ((character = reader.read()) != -1) {
char ch = (char) character;
if (Character.isWhitespace((char) ch)) {
if (input.length() > 0) {
stemmer.setCurrent(input.toString());
for (int i = repeat; i != 0; i--) {
stemmer.stem();
}
output.write(stemmer.getCurrent());
output.write('\n');
input.delete(0, input.length());
}
} else {
input.append(Character.toLowerCase(ch));
}
}
output.flush();
}
}

+ 66
- 0
contrib/snowball/runtime/api.c View File

@@ -0,0 +1,66 @@

#include <stdlib.h> /* for calloc, free */
#include "header.h"

extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
{
struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
if (z == NULL) return NULL;
z->p = create_s();
if (z->p == NULL) goto error;
if (S_size)
{
int i;
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
if (z->S == NULL) goto error;

for (i = 0; i < S_size; i++)
{
z->S[i] = create_s();
if (z->S[i] == NULL) goto error;
}
}

if (I_size)
{
z->I = (int *) calloc(I_size, sizeof(int));
if (z->I == NULL) goto error;
}

if (B_size)
{
z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
if (z->B == NULL) goto error;
}

return z;
error:
SN_close_env(z, S_size);
return NULL;
}

extern void SN_close_env(struct SN_env * z, int S_size)
{
if (z == NULL) return;
if (S_size)
{
int i;
for (i = 0; i < S_size; i++)
{
lose_s(z->S[i]);
}
free(z->S);
}
free(z->I);
free(z->B);
if (z->p) lose_s(z->p);
free(z);
}

extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
{
int err = replace_s(z, 0, z->l, size, s, NULL);
z->c = 0;
return err;
}


+ 26
- 0
contrib/snowball/runtime/api.h View File

@@ -0,0 +1,26 @@

typedef unsigned char symbol;

/* Or replace 'char' above with 'short' for 16 bit characters.

More precisely, replace 'char' with whatever type guarantees the
character width you need. Note however that sizeof(symbol) should divide
HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
there is an alignment problem. In the unlikely event of a problem here,
consult Martin Porter.

*/

struct SN_env {
symbol * p;
int c; int l; int lb; int bra; int ket;
symbol * * S;
int * I;
unsigned char * B;
};

extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
extern void SN_close_env(struct SN_env * z, int S_size);

extern int SN_set_current(struct SN_env * z, int size, const symbol * s);


+ 58
- 0
contrib/snowball/runtime/header.h View File

@@ -0,0 +1,58 @@

#include <limits.h>

#include "api.h"

#define MAXINT INT_MAX
#define MININT INT_MIN

#define HEAD 2*sizeof(int)

#define SIZE(p) ((int *)(p))[-1]
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
#define CAPACITY(p) ((int *)(p))[-2]

struct among
{ int s_size; /* number of chars in string */
const symbol * s; /* search string */
int substring_i;/* index to longest matching substring */
int result; /* result of the lookup */
int (* function)(struct SN_env *);
};

extern symbol * create_s(void);
extern void lose_s(symbol * p);

extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);

extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);

extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);

extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
extern int eq_v(struct SN_env * z, const symbol * p);
extern int eq_v_b(struct SN_env * z, const symbol * p);

extern int find_among(struct SN_env * z, const struct among * v, int v_size);
extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);

extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
extern int slice_from_v(struct SN_env * z, const symbol * p);
extern int slice_del(struct SN_env * z);

extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);

extern symbol * slice_to(struct SN_env * z, symbol * p);
extern symbol * assign_to(struct SN_env * z, symbol * p);

extern void debug(struct SN_env * z, int number, int line_count);


+ 478
- 0
contrib/snowball/runtime/utilities.c View File

@@ -0,0 +1,478 @@

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "header.h"

#define unless(C) if(!(C))

#define CREATE_SIZE 1

extern symbol * create_s(void) {
symbol * p;
void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
if (mem == NULL) return NULL;
p = (symbol *) (HEAD + (char *) mem);
CAPACITY(p) = CREATE_SIZE;
SET_SIZE(p, CREATE_SIZE);
return p;
}

extern void lose_s(symbol * p) {
if (p == NULL) return;
free((char *) p - HEAD);
}

/*
new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
position, or 0 on failure.

-- used to implement hop and next in the utf8 case.
*/

extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
int b;
if (n >= 0) {
for (; n > 0; n--) {
if (c >= l) return -1;
b = p[c++];
if (b >= 0xC0) { /* 1100 0000 */
while (c < l) {
b = p[c];
if (b >= 0xC0 || b < 0x80) break;
/* break unless b is 10------ */
c++;
}
}
}
} else {
for (; n < 0; n++) {
if (c <= lb) return -1;
b = p[--c];
if (b >= 0x80) { /* 1000 0000 */
while (c > lb) {
b = p[c];
if (b >= 0xC0) break; /* 1100 0000 */
c--;
}
}
}
}
return c;
}

/* Code for character groupings: utf8 cases */

static int get_utf8(const symbol * p, int c, int l, int * slot) {
int b0, b1;
if (c >= l) return 0;
b0 = p[c++];
if (b0 < 0xC0 || c == l) { /* 1100 0000 */
* slot = b0; return 1;
}
b1 = p[c++];
if (b0 < 0xE0 || c == l) { /* 1110 0000 */
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
}
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
}

static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
int b0, b1;
if (c <= lb) return 0;
b0 = p[--c];
if (b0 < 0x80 || c == lb) { /* 1000 0000 */
* slot = b0; return 1;
}
b1 = p[--c];
if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
* slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
}
* slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
}

extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_utf8(z->p, z->c, z->l, & ch);
unless (w) return -1;
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c += w;
} while (repeat);
return 0;
}

extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
unless (w) return -1;
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c -= w;
} while (repeat);
return 0;
}

extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_utf8(z->p, z->c, z->l, & ch);
unless (w) return -1;
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c += w;
} while (repeat);
return 0;
}

extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
unless (w) return -1;
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c -= w;
} while (repeat);
return 0;
}

/* Code for character groupings: non-utf8 cases */

extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c >= z->l) return -1;
ch = z->p[z->c];
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c++;
} while (repeat);
return 0;
}

extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c <= z->lb) return -1;
ch = z->p[z->c - 1];
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c--;
} while (repeat);
return 0;
}

extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c >= z->l) return -1;
ch = z->p[z->c];
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c++;
} while (repeat);
return 0;
}

extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c <= z->lb) return -1;
ch = z->p[z->c - 1];
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c--;
} while (repeat);
return 0;
}

extern int eq_s(struct SN_env * z, int s_size, const symbol * s) {
if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
z->c += s_size; return 1;
}

extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
z->c -= s_size; return 1;
}

extern int eq_v(struct SN_env * z, const symbol * p) {
return eq_s(z, SIZE(p), p);
}

extern int eq_v_b(struct SN_env * z, const symbol * p) {
return eq_s_b(z, SIZE(p), p);
}

extern int find_among(struct SN_env * z, const struct among * v, int v_size) {

int i = 0;
int j = v_size;

int c = z->c; int l = z->l;
symbol * q = z->p + c;

const struct among * w;

int common_i = 0;
int common_j = 0;

int first_key_inspected = 0;

while(1) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; /* smaller */
w = v + k;
{
int i2; for (i2 = common; i2 < w->s_size; i2++) {
if (c + common == l) { diff = -1; break; }
diff = q[common] - w->s[i2];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1) {
if (i > 0) break; /* v->s has been inspected */
if (j == i) break; /* only one item in v */

/* - but now we need to go round once more to get
v->s inspected. This looks messy, but is actually
the optimal approach. */

if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1) {
w = v + i;
if (common_i >= w->s_size) {
z->c = c + w->s_size;
if (w->function == 0) return w->result;
{
int res = w->function(z);
z->c = c + w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}

/* find_among_b is for backwards processing. Same comments apply */

extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {

int i = 0;
int j = v_size;

int c = z->c; int lb = z->lb;
symbol * q = z->p + c - 1;

const struct among * w;

int common_i = 0;
int common_j = 0;

int first_key_inspected = 0;

while(1) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
w = v + k;
{
int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) {
if (c - common == lb) { diff = -1; break; }
diff = q[- common] - w->s[i2];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1) {
if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1) {
w = v + i;
if (common_i >= w->s_size) {
z->c = c - w->s_size;
if (w->function == 0) return w->result;
{
int res = w->function(z);
z->c = c - w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}


/* Increase the size of the buffer pointed to by p to at least n symbols.
* If insufficient memory, returns NULL and frees the old buffer.
*/
static symbol * increase_size(symbol * p, int n) {
symbol * q;
int new_size = n + 20;
void * mem = realloc((char *) p - HEAD,
HEAD + (new_size + 1) * sizeof(symbol));
if (mem == NULL) {
lose_s(p);
return NULL;
}
q = (symbol *) (HEAD + (char *)mem);
CAPACITY(q) = new_size;
return q;
}

/* to replace symbols between c_bra and c_ket in z->p by the
s_size symbols at s.
Returns 0 on success, -1 on error.
Also, frees z->p (and sets it to NULL) on error.
*/
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
{
int adjustment;
int len;
if (z->p == NULL) {
z->p = create_s();
if (z->p == NULL) return -1;
}
adjustment = s_size - (c_ket - c_bra);
len = SIZE(z->p);
if (adjustment != 0) {
if (adjustment + len > CAPACITY(z->p)) {
z->p = increase_size(z->p, adjustment + len);
if (z->p == NULL) return -1;
}
memmove(z->p + c_ket + adjustment,
z->p + c_ket,
(len - c_ket) * sizeof(symbol));
SET_SIZE(z->p, adjustment + len);
z->l += adjustment;
if (z->c >= c_ket)
z->c += adjustment;
else
if (z->c > c_bra)
z->c = c_bra;
}
unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
if (adjptr != NULL)
*adjptr = adjustment;
return 0;
}

static int slice_check(struct SN_env * z) {

if (z->bra < 0 ||
z->bra > z->ket ||
z->ket > z->l ||
z->p == NULL ||
z->l > SIZE(z->p)) /* this line could be removed */
{
#if 0
fprintf(stderr, "faulty slice operation:\n");
debug(z, -1, 0);
#endif
return -1;
}
return 0;
}

extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
if (slice_check(z)) return -1;
return replace_s(z, z->bra, z->ket, s_size, s, NULL);
}

extern int slice_from_v(struct SN_env * z, const symbol * p) {
return slice_from_s(z, SIZE(p), p);
}

extern int slice_del(struct SN_env * z) {
return slice_from_s(z, 0, 0);
}

extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
int adjustment;
if (replace_s(z, bra, ket, s_size, s, &adjustment))
return -1;
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
return 0;
}

extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
int adjustment;
if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
return -1;
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
return 0;
}

extern symbol * slice_to(struct SN_env * z, symbol * p) {
if (slice_check(z)) {
lose_s(p);
return NULL;
}
{
int len = z->ket - z->bra;
if (CAPACITY(p) < len) {
p = increase_size(p, len);
if (p == NULL)
return NULL;
}
memmove(p, z->p + z->bra, len * sizeof(symbol));
SET_SIZE(p, len);
}
return p;
}

extern symbol * assign_to(struct SN_env * z, symbol * p) {
int len = z->l;
if (CAPACITY(p) < len) {
p = increase_size(p, len);
if (p == NULL)
return NULL;
}
memmove(p, z->p, len * sizeof(symbol));
SET_SIZE(p, len);
return p;
}

#if 0
extern void debug(struct SN_env * z, int number, int line_count) {
int i;
int limit = SIZE(z->p);
/*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
for (i = 0; i <= limit; i++) {
if (z->lb == i) printf("{");
if (z->bra == i) printf("[");
if (z->c == i) printf("|");
if (z->ket == i) printf("]");
if (z->l == i) printf("}");
if (i < limit)
{ int ch = z->p[i];
if (ch == 0) ch = '#';
printf("%c", ch);
}
}
printf("'\n");
}
#endif

+ 0
- 1
doc/doxydown

@@ -1 +0,0 @@
Subproject commit 6c1f79c4294ef66a55bfc75845f3fdf3e2e1c32d

+ 19
- 0
doc/doxydown/.gitignore View File

@@ -0,0 +1,19 @@
/blib/
/.build/
_build/
cover_db/
inc/
Build
!Build/
Build.bat
.last_cover_stats
/Makefile
/Makefile.old
/MANIFEST.bak
/META.yml
/META.json
/MYMETA.*
nytprof.out
/pm_to_blib
*.o
*.bs

+ 21
- 0
doc/doxydown/LICENSE View File

@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2014 Vsevolod Stakhov

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

+ 139
- 0
doc/doxydown/README.md View File

@@ -0,0 +1,139 @@
# Doxydown - documentation utility

## Introduction

Doxydown is an utility to convert `doxygen`-like comments from the source code to markdown.
Unlike other documentation systems, `doxydown` is specifically designed to generate markdown output only.
At the moment, doxydown can work with C and lua comments and produce kramdown/pandoc or github
flavoured markdown. Doxydown produces output with anchors, links and table of content.
It also can highlight syntax for examples in the documentation.

### Why markdown

Markdown is used by many contemporary engines and can be rendered to HTML using
advanced templates, styles and scripts. Markdown provides excellent formatting
capabilities while it doesn't require authors to be web designers to create
documentation. Markdown is rendered by [`github`](https://github.com) and
doxydown can generate documentation easily viewed directly inside github. Moreover,
doxydown supports pandoc style of markdown and that means that markdown output
can be converted to all formats supported by pandoc (html, pdf, latex,
man pages and many others).

### Why not `other documentation generator`

Doxydown is extremely simple as it can output markdown only but it is very
convenient tool to generate nice markdown with all features required from the
documentation system. Doxydown uses input format that is very close to `doxygen`
that allows to re-use the existing documentation comments. Currenly, doxydown
does not support many features but they could be easily added on demand.

## Input format

Doxydown extracts documentation from the comments blocks. The start of block is indicated by

/***

in `C` or by

--[[[

in `lua`. The end of documentation block is the normal multiline comment ending
specific for the input language. Doxydown also strips an initial comment character,
therefore the following inputs are equal:

~~~c
/***
* some text
* other text
*
*/
~~~
and

~~~c
/***
some text
other text

*/
~~~

Note that doxydown preserves empty lines and all markdown elements.

### Documentation blocks

Each documentation block describes either module or function/method. Modules are
logical compounds of functions and methods. The difference between method and
function is significant for languages with methods support (e.g. by `lua` via
metatables). To define method or function you can use the following:

/***
@function my_awesome_function(param1[, param2])
This function is awesome.
*/

All text met in the current documentation block is used as function or method description.
You can also define parameters and return values for functions and methods:

@param {type} param1 mandatory param

Here, `{type}` is optional type description for a parameter, `param1` is parameter's name
and the rest of the string is parameter description. Currently, you cannot split
parameter description by newline character. In future versions of doxydown this might
be fixed.

You can specify return type of your function by using of `@return` tag:

@return {type} some cool result
This tag is similar to `@param` and has the same limitation regarding newlines.
You can also add some example code by using of `@example` tag:

@example
my_awesome_function('hello'); // returns 42

All text after `@example` tag and until documentation block end is used as an example
and is highlighted in markdown. Also you can switch the language of example by using
the extended `@example` tag:

@example lua

In this example, the code will be highlighted as `lua` code.

Modules descriptions uses the same conventions, but `@param` and `@return` are
meaningless for the modules. Function and methods blocks that follows some `@module`
block are automatically attached to that module.

Both modules and function can use links to other functions and methods by using of
`@see` tag:

@see my_awesome_function

This inserts a hyperlink to the specified function definition to the markdown.

## Output format

Doxydown can generate github flavoured markdown and pandoc/kramdown compatible
markdown. The main difference is in how anchors are organized. In kramdown and
pandoc it is possible to specify an explicit id for each header, whilst in
GH flavoured markdown we can use only implicit anchors.

### Examples

You can see an example of github flavoured markdown render at
[libucl github page](https://github.com/vstakhov/libucl/blob/master/doc/lua_api.md).
The same page bu rendered by kramdown engine in `jekyll` platform can be
accessed by [this address](https://rspamd.com/doc/lua/ucl.html).

## Program invocation

doxydown [-hg] [-l language] < input_source > markdown.md

* `-h`: help message
* `-e`: sets default example language (default: lua)
* `-l`: sets input language (default: c)
* `-g`: use github flavoured markdown (default: kramdown/pandoc)

## License

Doxydown is published by terms of `MIT` license.

+ 388
- 0
doc/doxydown/doxydown.pl View File

@@ -0,0 +1,388 @@
#!/usr/bin/env perl

$VERSION = "0.1";

use strict;
use warnings;
use Data::Dumper;
use Digest::MD5 qw(md5_hex);

my @modules;
my %options = ();
my $cur_module;
my $example_language = "lua";

my %languages = (
c => {
start => qr/^\s*\/\*\*\*(?:\s*|(\s+\S.+\s*))$/,
end => qr/^\s*\*+\/\s*$/,
filter => qr/^(?:\s*\*+\s?)?(\s*[^*].+)\s*$/,
},
lua => {
start => qr/^\s*\--\[\[\[\s*$/,
end => qr/^\s*--\]\]\s*/,
filter => qr/^(?:\s*--\s)?(\s*\S.+)\s*$/,
},
);

my $function_re = qr/^\s*\@(function|fn|method)\s*(\S.+)$/oi;
my $module_re = qr/^\s*\@(?:module|file)\s*(\S.+)$/oi;

my $language;

sub print_module_markdown {
my ( $mname, $m ) = @_;

my $idline = $options{g} ? "" : " {#$m->{'id'}}";
print <<EOD;
## Module `$mname`$idline

$m->{'data'}
EOD
if ( $m->{'example'} ) {
print <<EOD;

Example:

~~~$m->{'example_language'}
$m->{'example'}
~~~
EOD
}

sub print_func {
my ($f) = @_;

my $name = $f->{'name'};
my $id = $f->{'id'};
if ($f->{'brief'}) {
print "> [`$name`](#$id): ". $f->{'brief'} . "\n\n";
}
else {
print "> [`$name`](#$id)\n\n";
}
}

print "\n### Brief content:\n\n";
if (scalar(@{ $m->{'functions'} }) > 0) {
print "**Functions**:\n\n";
foreach ( @{ $m->{'functions'} } ) {
print_func($_);
}
}
if (scalar(@{ $m->{'methods'} }) > 0) {
print "\n\n**Methods**:\n\n";
foreach (@{ $m->{'methods'} }) {
print_func($_);
}
}
}

sub print_function_markdown {
my ( $type, $fname, $f ) = @_;

my $idline = $options{g} ? "" : " {#$f->{'id'}}";
print <<EOD;
### $type `$fname`$idline

$f->{'data'}
EOD
print "\n**Parameters:**\n\n";
if ( $f->{'params'} && scalar @{ $f->{'params'} } > 0 ) {
foreach ( @{ $f->{'params'} } ) {
if ( $_->{'type'} ) {
print
"- `$_->{'name'} \{$_->{'type'}\}`: $_->{'description'}\n";
}
else {
print "- `$_->{'name'}`: $_->{'description'}\n";
}
}
}
else {
print "\tnothing\n";
}
print "\n**Returns:**\n\n";
if ( $f->{'return'} && $f->{'return'}->{'description'} ) {
$_ = $f->{'return'};
if ( $_->{'type'} ) {
print "- `\{$_->{'type'}\}`: $_->{'description'}\n";
}
else {
print "- $_->{'description'}\n";
}
}
else {
print "\tnothing\n";
}
if ( $f->{'example'} ) {
print <<EOD;

Example:

~~~$f->{'example_language'}
$f->{'example'}
~~~
EOD
}
}

sub print_markdown {
for my $m (@modules) {
my $mname = $m->{name};
print_module_markdown( $mname, $m );

if (scalar(@{ $m->{'functions'} }) > 0) {
print
"\n## Functions\n\nThe module `$mname` defines the following functions.\n\n";
foreach (@{ $m->{'functions'} }) {
print_function_markdown( "Function", $_->{'name'}, $_ );
print "\nBack to [module description](#$m->{'id'}).\n\n";

}
}

if (scalar(@{ $m->{'methods'} }) > 0) {
print
"\n## Methods\n\nThe module `$mname` defines the following methods.\n\n";
foreach (@{ $m->{'methods'} }) {
print_function_markdown( "Method", $_->{'name'}, $_ );
print "\nBack to [module description](#$m->{'id'}).\n\n";

}
}

print "\nBack to [top](#).\n\n";
}
}

sub make_id {
my ( $name, $prefix ) = @_;

if ( !$prefix ) {
$prefix = "f";
}
if ( !$options{g} ) {

# Kramdown/pandoc version of ID's
$name =~ /^(\S+).*$/;
return substr( substr( $prefix, 0, 1 ) . md5_hex($1), 0, 6 );
}
else {
my $input = lc $prefix . "-" . $name;
my $id = join '-', split /\s+/, $input;
$id =~ s/[^\w_-]+//g;
return $id;
}
}

sub substitute_data_keywords {
my ($line) = @_;

if ( $line =~ /^.*\@see\s+(\S+)\s*.*$/ ) {
my $name = $1;
my $id = make_id($name);
return $line =~ s/\@see\s+\S+/[`$name`](#$id)/r;
}

return $line;
}

sub parse_function {
my ( $func, @data ) = @_;

my ( $type, $name ) = ( $func =~ $function_re );

chomp $name;

my $f = {
name => $name,
data => '',
example => undef,
example_language => $example_language,
id => make_id( $name, $type ),
};
my $example = 0;

foreach (@data) {
if (/^\s*\@param\s*(?:\{([^}]+)\})?\s*(\S+)\s*(.+)?\s*$/) {
my $p = { name => $2, type => $1, description => $3 };
push @{ $f->{'params'} }, $p;
}
elsif (/^\s*\@return\s*(?:\{([^}]+)\})?\s*(.+)?\s*$/) {
my $r = { type => $1, description => $2 };
$f->{'return'} = $r;
}
elsif (/^\s*\@brief\s*(\S.+)$/) {
$f->{'brief'} = $1;
}
elsif (/^\s*\@example\s*(\S)?\s*$/) {
$example = 1;
if ($1) {
$f->{'example_language'} = $1;
}
}
elsif ( $_ ne $func ) {
if ($example) {
$f->{'example'} .= $_;
}
else {
$f->{'data'} .= substitute_data_keywords($_);
}
}
}
if ( $f->{'data'} ) {
chomp $f->{'data'};
}
elsif ($f->{'brief'}) {
chomp $f->{'brief'};
$f->{'data'} = $f->{'brief'};
}
if ( $f->{'example'} ) {
chomp $f->{'example'};
}

if ( $type eq "method" ) {
push @{ $cur_module->{'methods'} }, $f;
}
else {
push @{ $cur_module->{'functions'} }, $f;
}
}

sub parse_module {
my ( $module, @data ) = @_;
my ( $name ) = ( $module =~ $module_re );

chomp $name;

my $f = {
name => $name,
functions => [],
methods => [],
data => '',
example => undef,
example_language => $example_language,
id => make_id( $name, "module" ),
};
my $example = 0;

foreach (@data) {
if (/^\s*\@example\s*(\S)?\s*$/) {
$example = 1;
if ($1) {
$f->{'example_language'} = $1;
}
}
elsif (/^\s*\@brief\s*(\S.+)$/) {
$f->{'brief'} = $1;
}
elsif ( $_ ne $module ) {
if ($example) {
$f->{'example'} .= $_;
}
else {
$f->{'data'} .= substitute_data_keywords($_);
}
}
}
if ( $f->{'data'} ) {
chomp $f->{'data'};
}
elsif ($f->{'brief'}) {
chomp $f->{'brief'};
$f->{'data'} = $f->{'brief'};
}
if ( $f->{'example'} ) {
chomp $f->{'example'};
}
$cur_module = $f;
push @modules, $f;
}

sub parse_content {
my @func = grep /$function_re/, @_;
if ( scalar @func > 0 ) {
parse_function( $func[0], @_ );
}

my @module = grep /$module_re/, @_;
if ( scalar @module > 0 ) {
parse_module( $module[0], @_ );
}
}

sub HELP_MESSAGE {
print STDERR <<EOF;
Utility to convert doxygen comments to markdown.

usage: $0 [-hg] [-l language] < input_source > markdown.md

-h : this (help) message
-e : sets default example language (default: lua)
-l : sets input language (default: c)
-g : use github flavoured markdown (default: kramdown/pandoc)
EOF
exit;
}

$Getopt::Std::STANDARD_HELP_VERSION = 1;
use Getopt::Std;
getopts( 'he:gl:', \%options );

HELP_MESSAGE() if $options{h};

$example_language = $options{e} if $options{e};
$language = $languages{ lc $options{l} } if $options{l};

if ( !$language ) {
$language = $languages{c};
}

use constant {
STATE_READ_SKIP => 0,
STATE_READ_CONTENT => 1,
STATE_READ_ENUM => 2,
STATE_READ_STRUCT => 3,
};

my $state = STATE_READ_SKIP;
my $content;

while (<>) {
if ( $state == STATE_READ_SKIP ) {
if ( $_ =~ $language->{start} ) {
$state = STATE_READ_CONTENT;
if (defined($1)) {
chomp($content = $1);
$content =~ tr/\r//d;
$content .= "\n";
}
else {
$content = "";
}
}
}
elsif ( $state == STATE_READ_CONTENT ) {
if ( $_ =~ $language->{end} ) {
$state = STATE_READ_SKIP;
parse_content( split /^/, $content );
$content = "";
}
else {
my ($line) = ( $_ =~ $language->{filter} );

if ($line) {
$line =~ tr/\r//d;
$content .= $line . "\n";
}
else {
# Preserve empty lines
$content .= "\n";
}
}
}
}

#print Dumper( \@modules );
print_markdown;

+ 0
- 1
interface

@@ -1 +0,0 @@
Subproject commit db96fa946f8c79cf3d25d44efce1426496a6ee79

+ 50
- 0
interface/README.md View File

@@ -0,0 +1,50 @@
#Rspamd web interface

##Overview.

This is a simple control interface for rspamd spam filtering system.
It provides basic functions for setting metric actions, scores,
viewing statistic and learning.

<img src="https://rspamd.com/img/webui.png" class="img-responsive" alt="Webui screenshot">

##Rspamd setup.

It is required to configure dynamic settings to store configured values.
Basically this can be done by providing the following line in options settings:

~~~nginx
options {
dynamic_conf = "/var/lib/rspamd/rspamd_dynamic";
}
~~~

Please note that this path must have write access for rspamd user.

Then controller worker should be configured:

~~~nginx
worker {
type = "controller";
bind_socket = "localhost:11334";
count = 1;
# Password for normal commands
password = "q1";
# Password for privilleged commands
enable_password = "q2";
# Path to webiu static files
static_dir = "${WWWDIR}";
}
~~~

Password option should be changed for sure for your specific configuration. Encrypted password using is encouraged (`rspamadm pw --encrypt`).

##Interface setup.

Interface itself is written in pure HTML5/js and, hence, it requires zero setup.
Just enter a password for webui access and you are ready.

##Contact information.

Rspamd interface is distributed under the terms of [MIT license](http://opensource.org/licenses/MIT). For all questions related to this
product please see the [support page](https://rspamd.com/support.html)

+ 24
- 0
interface/css/datatables.min.css View File

@@ -0,0 +1,24 @@
/*
* This combined file was created by the DataTables downloader builder:
* https://datatables.net/download
*
* To rebuild or modify this file with the latest versions of the included
* software please visit:
* https://datatables.net/download/#bs-3.3.5/jqc-1.11.3,dt-1.10.9,fh-3.0.0,r-1.0.7,sc-1.3.0
*
* Included libraries:
* jQuery compat 1.11.3, Bootstrap 3.3.5, DataTables 1.10.9, FixedHeader 3.0.0, Responsive 1.0.7, Scroller 1.3.0
*/

table.dataTable{clear:both;margin-top:6px !important;margin-bottom:6px !important;max-width:none !important}table.dataTable td,table.dataTable th{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box}table.dataTable td.dataTables_empty,table.dataTable th.dataTables_empty{text-align:center}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}div.dataTables_wrapper div.dataTables_length label{font-weight:normal;text-align:left;white-space:nowrap}div.dataTables_wrapper div.dataTables_length select{width:75px;display:inline-block}div.dataTables_wrapper div.dataTables_filter{text-align:right}div.dataTables_wrapper div.dataTables_filter label{font-weight:normal;white-space:nowrap;text-align:left}div.dataTables_wrapper div.dataTables_filter input{margin-left:0.5em;display:inline-block;width:auto}div.dataTables_wrapper div.dataTables_info{padding-top:8px;white-space:nowrap}div.dataTables_wrapper div.dataTables_paginate{margin:0;white-space:nowrap;text-align:right}div.dataTables_wrapper div.dataTables_paginate ul.pagination{margin:2px 0;white-space:nowrap}table.dataTable thead>tr>th.sorting_asc,table.dataTable thead>tr>th.sorting_desc,table.dataTable thead>tr>th.sorting,table.dataTable thead>tr>td.sorting_asc,table.dataTable thead>tr>td.sorting_desc,table.dataTable thead>tr>td.sorting{padding-right:30px}table.dataTable thead>tr>th:active,table.dataTable thead>tr>td:active{outline:none}table.dataTable thead .sorting,table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting_asc_disabled,table.dataTable thead .sorting_desc_disabled{cursor:pointer;position:relative}table.dataTable thead .sorting:after,table.dataTable thead .sorting_asc:after,table.dataTable thead .sorting_desc:after,table.dataTable thead .sorting_asc_disabled:after,table.dataTable thead .sorting_desc_disabled:after{position:absolute;bottom:8px;right:8px;display:block;font-family:'Glyphicons Halflings';opacity:0.5}table.dataTable thead .sorting:after{opacity:0.2;content:"\e150"}table.dataTable thead .sorting_asc:after{content:"\e155"}table.dataTable thead .sorting_desc:after{content:"\e156"}table.dataTable thead .sorting_asc_disabled:after,table.dataTable thead .sorting_desc_disabled:after{color:#eee}div.dataTables_scrollHead table.dataTable{margin-bottom:0 !important}div.dataTables_scrollBody table{border-top:none;margin-top:0 !important;margin-bottom:0 !important}div.dataTables_scrollBody table thead .sorting:after,div.dataTables_scrollBody table thead .sorting_asc:after,div.dataTables_scrollBody table thead .sorting_desc:after{display:none}div.dataTables_scrollBody table tbody tr:first-child th,div.dataTables_scrollBody table tbody tr:first-child td{border-top:none}div.dataTables_scrollFoot table{margin-top:0 !important;border-top:none}@media screen and (max-width: 767px){div.dataTables_wrapper div.dataTables_length,div.dataTables_wrapper div.dataTables_filter,div.dataTables_wrapper div.dataTables_info,div.dataTables_wrapper div.dataTables_paginate{text-align:center}}table.dataTable.table-condensed>thead>tr>th{padding-right:20px}table.dataTable.table-condensed .sorting:after,table.dataTable.table-condensed .sorting_asc:after,table.dataTable.table-condensed .sorting_desc:after{top:6px;right:6px}table.table-bordered.dataTable{border-collapse:separate !important}table.table-bordered.dataTable th,table.table-bordered.dataTable td{border-left-width:0}table.table-bordered.dataTable th:last-child,table.table-bordered.dataTable th:last-child,table.table-bordered.dataTable td:last-child,table.table-bordered.dataTable td:last-child{border-right-width:0}table.table-bordered.dataTable tbody th,table.table-bordered.dataTable tbody td{border-bottom-width:0}div.dataTables_scrollHead table.table-bordered{border-bottom-width:0}


table.dataTable.fixedHeader-floating,table.dataTable.fixedHeader-locked{background-color:white;margin-top:0 !important;margin-bottom:0 !important}table.dataTable.fixedHeader-floating{position:fixed}table.dataTable.fixedHeader-locked{position:absolute}


table.dataTable.dtr-inline.collapsed>tbody>tr>td:first-child,table.dataTable.dtr-inline.collapsed>tbody>tr>th:first-child{position:relative;padding-left:30px;cursor:pointer}table.dataTable.dtr-inline.collapsed>tbody>tr>td:first-child:before,table.dataTable.dtr-inline.collapsed>tbody>tr>th:first-child:before{top:8px;left:4px;height:16px;width:16px;display:block;position:absolute;color:white;border:2px solid white;border-radius:16px;text-align:center;line-height:14px;box-shadow:0 0 3px #444;box-sizing:content-box;content:'+';background-color:#337ab7}table.dataTable.dtr-inline.collapsed>tbody>tr>td:first-child.dataTables_empty:before,table.dataTable.dtr-inline.collapsed>tbody>tr>th:first-child.dataTables_empty:before{display:none}table.dataTable.dtr-inline.collapsed>tbody>tr.parent>td:first-child:before,table.dataTable.dtr-inline.collapsed>tbody>tr.parent>th:first-child:before{content:'-';background-color:#d33333}table.dataTable.dtr-inline.collapsed>tbody>tr.child td:before{display:none}table.dataTable.dtr-inline.collapsed.compact>tbody>tr>td:first-child,table.dataTable.dtr-inline.collapsed.compact>tbody>tr>th:first-child{padding-left:27px}table.dataTable.dtr-inline.collapsed.compact>tbody>tr>td:first-child:before,table.dataTable.dtr-inline.collapsed.compact>tbody>tr>th:first-child:before{top:5px;left:4px;height:14px;width:14px;border-radius:14px;line-height:12px}table.dataTable.dtr-column>tbody>tr>td.control,table.dataTable.dtr-column>tbody>tr>th.control{position:relative;cursor:pointer}table.dataTable.dtr-column>tbody>tr>td.control:before,table.dataTable.dtr-column>tbody>tr>th.control:before{top:50%;left:50%;height:16px;width:16px;margin-top:-10px;margin-left:-10px;display:block;position:absolute;color:white;border:2px solid white;border-radius:16px;text-align:center;line-height:14px;box-shadow:0 0 3px #444;box-sizing:content-box;content:'+';background-color:#337ab7}table.dataTable.dtr-column>tbody>tr.parent td.control:before,table.dataTable.dtr-column>tbody>tr.parent th.control:before{content:'-';background-color:#d33333}table.dataTable>tbody>tr.child{padding:0.5em 1em}table.dataTable>tbody>tr.child:hover{background:transparent !important}table.dataTable>tbody>tr.child ul{display:inline-block;list-style-type:none;margin:0;padding:0}table.dataTable>tbody>tr.child ul li{border-bottom:1px solid #efefef;padding:0.5em 0}table.dataTable>tbody>tr.child ul li:first-child{padding-top:0}table.dataTable>tbody>tr.child ul li:last-child{border-bottom:none}table.dataTable>tbody>tr.child span.dtr-title{display:inline-block;min-width:75px;font-weight:bold}


div.DTS tbody th,div.DTS tbody td{white-space:nowrap}div.DTS tbody tr.even{background-color:white}div.DTS div.DTS_Loading{z-index:1}div.DTS div.dataTables_scrollBody{background:repeating-linear-gradient(45deg, #edeeff, #edeeff 10px, #fff 10px, #fff 20px)}div.DTS div.dataTables_scrollBody table{z-index:2}div.DTS div.dataTables_paginate{display:none}



BIN
interface/css/glyphicons-halflings-regular.woff View File


BIN
interface/css/glyphicons-halflings-regular.woff2 View File


+ 635
- 0
interface/css/rspamd.css View File

@@ -0,0 +1,635 @@
/*
The MIT License (MIT)

Copyright (C) 2012-2013 Anton Simonov <untone@gmail.com>
Copyright (C) 2014-2015 Vsevolod Stakhov <vsevolod@highsecure.ru>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

a {
outline:none;
}
textarea {
font-family:Monaco,Menlo,Consolas,"Courier New",monospace;
}

.login {
width:480px;
margin-top:120px;
margin-left:-240px;
}

/* local overrides */
.disconnect {
margin:9px 0 0;
padding-bottom:5px;
font-size:11px;
color:#777;
-moz-text-shadow:0 1px 0 #fff;
-webkit-text-shadow:0 1px 0 #fff;
-o-text-shadow:0 1px 0 #fff;
-ms-text-shadow:0 1px 0 #fff;
text-shadow:0 1px 0 #fff;
}
.chart-content {
text-align:center;
}
.chart {
height:400px;
margin:0 auto;
text-align:center;
background:#fff;
}
.notice {
display:none;
}
#js .notice {
display:inline;
}

.form-sliders .control-label {
width:340px;
}
.form-sliders .controls {
margin-left:280px;
}
.slider-controls {
padding-right:8em !important;
}
.jslider {
top:13px !important;
}
input.slider {
width:4em;
margin:5px -7em 5px 0;
padding:2px 0;
text-align:center;
}
.symbols-label {
font-size:11px !important;
}

/* spinners optional */
input {
margin:0px;
padding:0px;
width:40px;
}
input[type=number] {
padding-right:25px; /* at least image width */
text-align:right;
width:40px;
}
input.number { /* should be same as type=number for IE and overriding */
padding-right:25px; /* at least image width */
text-align:right;
}
input::-webkit-outer-spin-button,
input::-webkit-inner-spin-button {
/* display:none; <- Crashes Chrome on hover */
-webkit-appearance:none;
margin:0; /* <-- Apparently some margin are still there even though it's hidden */
}
.k-numerictextbox,
.k-numeric-wrap {
display:block;
}
.k-numeric-wrap {
position:relative;
}
input.numeric {
text-align:right;
}
.k-select {
overflow:hidden;
position:absolute;
top:5px;
left:52px;
width:11px;
height:22px;
font-size:1px;
line-height:1px;
text-indent:-999px;
}
.k-link,
.k-icon {
display:block;
width:11px;
height:10px;
}
.k-icon {
cursor:pointer;
background-image:url('../img/spinner.png');
}
.k-i-arrow-n {
background-position:0 0;
}
.k-i-arrow-n:hover,
.k-i-arrow-n:focus {
background-position:-11px 0;
}
.k-i-arrow-n:active {
background-position:-22px 0;
}
.k-i-arrow-s {
margin-top:-1px;
background-position:0 -10px;
}
.k-i-arrow-s:hover,
.k-i-arrow-s:focus {
background-position:-11px -10px;
}
.k-i-arrow-s:active {
background-position:-22px -10px;
}
/* spinners default style */
/* .spin-cell {
position:relative;
}
.spinControl {
position:absolute;
height:20px;
top:6px;
left:56px;
}
.spinControl.MOZ, .spinControl.IE6, .spinControl.IE7 {
}
.spinControl button {
position:absolute;
left:0;
width:11px;
height:10px;
margin:0;
padding:0;
border:0;
background-color:none;
cursor:pointer;
background-image:url('../img/spinner.png');
}
.spinControl button.down {
bottom:1px;
}
.spinControl button.up {
}
.spinControl button.up:hover,
.spinControl button.up:focus {
background-position:-11px 0;
}
.spinControl button.up:active {
background-position:-22px 0;
}
.spinControl button.down {
background-position:0 -10px;
}
.spinControl button.down:hover,
.spinControl button.down:focus {
background-position:-11px -10px;
}
.spinControl button.down:active {
background-position:-22px -10px;
} */

/* history table */
.table-log {
table-layout:fixed;
border:0 !important;
}
.table-log .col1 {
width:130px;
}
.table-log .col2,
.table-log .col6 {
width:100%;
}
.table-log .col3,
.table-log .col4,
.table-log .col5 {
width:100px;
}
.table-log .col7,
.table-log .col8,
.table-log .col9 {
width:50px;
}
.table-log th {
padding:4px 10px;
font-size:10px;
color:#666666;
white-space:nowrap;
border-bottom:1px solid #ddd;
}
.table-log thead th,
.table-log tbody td {
text-align:left;
line-height:16px;
vertical-align:top;
}
.table-log td,
.table-log td .label {
font-family:Monaco,Menlo,Consolas,"Courier New",monospace;
font-size:11px;
}
.table-log th:first-child,
.table-log td:first-child {
border-left:0;
}
.table-log .cell-overflow {
white-space:nowrap;
overflow:hidden;
-o-text-overflow:ellipsis;
text-overflow:ellipsis;
}
.table-log .cell-overflow:focus {
margin:-3px -8px;
padding:2px 7px 3px;
position:absolute;
overflow:visible;
background:#fff;
border:1px solid #ccc;
-moz-box-shadow:0 2px 2px -2px #CCCCCC inset;
box-shadow:0 2px 2px -2px #CCCCCC inset;
cursor:text;
}
.table-log th {
background-color:#efefef;
border-left:1px solid #CDCDCD;
}
.table-log th.header {
cursor:pointer;
}
.table-log th:first-child {
border-left:0;
}
.table-log th.headerSortUp,
.table-log th.headerSortDown {
background-color:#fefefe;
background-position:100% 12px;
background-repeat:no-repeat;
}
.table-log th.headerSortUp {
background-image:url('../img/asc.png');
}
.table-log th.headerSortDown {
background-image:url('../img/desc.png');
}

.btn-upload-trigger {
position:relative;
z-index:1;
}
.upload-textarea,
.scan-textarea {
width:100% !important;
}
.upload-textarea {
height:200px;
}
.scan-textarea {
height:400px;
}
.stat-boxes {
overflow:hidden !important;
height:73px !important;
}

.row-bordered {
margin-bottom:13px;
border-bottom:1px solid #cdcdcd;
}
.symbol-description {
display:block;
margin:4px 0 0 6px;
font-size:10px;
font-weight:bold;
color:#666;
}

.list-textarea {
width:100%;
height:360px;
}
.align-right {
text-align:right !important;
}
td.maps-cell {
vertical-align:middle;
}

.map-link {
display:block;
color:#0088cc;
cursor:pointer;
}
.map-link:hover,
.map-link:focus {
color:#005580;
text-decoration:underline;
}
.spinner {
background:url('../img/spinner.gif') no-repeat -100px;
padding-left:25px;
}
.loading .spinner {
background-position:0 50%;
}

/* widget */
.widget-box {
background:none repeat scroll 0 0 #F9F9F9;
border-top:1px solid #CDCDCD;
border-left:1px solid #CDCDCD;
border-right:1px solid #CDCDCD;
clear:both;
margin-top:16px;
margin-bottom:16px;
position:relative;
}
.widget-box.widget-calendar, .widget-box.widget-chat {
overflow:hidden !important;
}
.accordion .widget-box {
margin-top:-2px;
margin-bottom:0;
border-radius:0;
}
.widget-box.widget-plain {
background:transparent;
border:none;
margin-top:0;
margin-bottom:0;
}
.widget-title, .modal-header {
background-color:#efefef;
background-image:-webkit-gradient(linear, 0 0%, 0 100%, from(#fdfdfd), to(#eaeaea));
background-image:-webkit-linear-gradient(top, #fdfdfd 0%, #eaeaea 100%);
background-image:-moz-linear-gradient(top, #fdfdfd 0%, #eaeaea 100%);
background-image:-ms-linear-gradient(top, #fdfdfd 0%, #eaeaea 100%);
background-image:-o-linear-gradient(top, #fdfdfd 0%, #eaeaea 100%);
background-image:-linear-gradient(top, #fdfdfd 0%, #eaeaea 100%);
filter:progid:DXImageTransform.Microsoft.gradient( startColorstr='#fdfdfd', endColorstr='#eaeaea',GradientType=0 ); /* IE6-9 */
border-bottom:1px solid #CDCDCD;
height:36px;
}
.widget-title span.icon {
border-right:1px solid #cdcdcd;
padding:9px 10px 7px 11px;
float:left;
opacity:.7;
}
.widget-title h5 {
color:#666666;
text-shadow:0 1px 0 #ffffff;
/* float:left; */
display:block;
font-size:12px;
font-weight:bold;
padding:12px;
line-height:12px;
margin:0 0 0 36px;
}
.widget-title .buttons {
float:left;
margin:0px 0px 0 0;
}
.widget-title .label {
padding:3px 5px 2px;
float:right;
margin:9px 15px 0 0;
box-shadow:0 1px 2px rgba(0,0,0,0.3) inset, 0 1px 0 #ffffff;
}
.widget-content {
padding:12px 15px;
border-bottom:1px solid #cdcdcd;
-moz-box-shadow:0 1px 2px -1px rgba(0, 0, 0, 0.2);
box-shadow:0 1px 2px -1px rgba(0, 0, 0, 0.2);
}
.widget-inner {
padding:12px 15px;
}
.stats-plain {
width:100%;
}
.stat-boxes, .quick-actions, .quick-actions-horizontal, .stats-plain {
display:block;
list-style:none outside none;
margin:0;
}
.stat-box {
background-color:#F6F6F6;
background-image:-webkit-gradient(linear, 0 0%, 0 100%, from(#F9F9F9), to(#EDEDED));
background-image:-webkit-linear-gradient(top, #F9F9F9 0%, #EDEDED 100%);
background-image:-moz-linear-gradient(top, #F9F9F9 0%, #EDEDED 100%);
background-image:-ms-linear-gradient(top, #F9F9F9 0%, #EDEDED 100%);
background-image:-o-linear-gradient(top, #F9F9F9 0%, #EDEDED 100%);
background-image:linear-gradient(top, #F9F9F9 0%, #EDEDED 100%);
border:1px solid #d5d5d5;
border-radius:4px 4px 4px 4px;
box-shadow:0 1px 0 0 #FFFFFF inset, 0 1px 0 rgba(255,255,255,0.4);
display:inline-block;
line-height:18px;
margin:0 10px 10px 0;
padding:0 10px;
}
/*.stat-boxes .stat-box:first-child {
margin-right:0;
}*/
.stat-box .widget {
overflow:hidden;
margin: 0 12px;
padding: 10px 0 6px;
font-size: 10px;
font-weight: bold;
text-align: center;
text-transform:capitalize;
text-shadow: 0 1px 0 white;
color: #666;
}
.stat-box .left,
.stat-box .right {
float:left;
}
.stat-box .left {
border-right: 1px solid gainsboro;
box-shadow: 1px 0 0 0 white;
margin-right: 14px;
padding-right:18px;
font-size: 10px;
font-weight: bold;
}
.stat-box .right {
padding-left:4px;
}
.stat-box .widget span, .stat-box .widget strong {
display: block;
}
.stat-box .widget strong {
font-size: 26px;
margin-bottom: 3px;
margin-top: 6px;
}
.nomargin {
margin:0 !important;
}
.nopadding {
padding:0 !important;
}
.activity-list {
list-style:none outside none;
margin:0;
}
.activity-list li {
border-bottom:1px solid #EEEEEE;
display:block;
}
.activity-list li:last-child {
border-bottom:medium none;
}
.activity-list li a {
color:#888888;
display:block;
padding:7px 10px;
}
.activity-list li a:hover {
background-color:#FBFBFB;
}
.activity-list li a span {
color:#AAAAAA;
font-size:11px;
font-style:italic;
}
.activity-list li a i {
margin-right:10px;
opacity:0.6;
vertical-align:middle;
}
.recent-posts, .recent-comments, .recent-users {
margin:0;
padding:0;
}
.recent-posts li, .article-post li {
border-bottom:1px dotted #AEBDC8;
list-style:none outside none;
padding:10px;
}
.modal-header {
height:auto;
padding:8px 15px 5px;
}
.modal-header h3 {
font-size:12px;
text-shadow:0 1px 0 #FFFFFF;
}
.alert {
position:fixed;
z-index:1050;
top:41px;
right:0;
left:0;
padding:8px 0 8px;
margin:0 0 10px;
text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);
background-color:#fcf8e3;
border-bottom:1px solid #fbeed5;
}
.alert.alert-modal {
top:0;
}
.alert strong {
display:inline-block;
padding-left:35px;
}
.alert,
.alert h4 {
color:#c09853;
}
.alert h4 {
margin:0;
}
.alert .close {
position:relative;
top:0;
right:9px;
line-height:20px;
}
.alert-block {
position:static;
padding: 8px 14px;
border: 1px solid #fbeed5;
-webkit-border-radius: 4px;
-moz-border-radius: 4px;
border-radius: 4px;
-moz-text-shadow: 0 1px 0 rgba(255, 255, 255, 0.5);
-webkit-text-shadow: 0 1px 0 rgba(255, 255, 255, 0.5);
text-shadow: 0 1px 0 rgba(255, 255, 255, 0.5);
}
.alert-success {
color:#468847;
background:#dff0d8;
border-color:#d6e9c6;
}
.alert-success h4 {
color:#468847;
}
.alert-danger,
.alert-error {
color:#b94a48;
background:#f2dede;
border-color:#eed3d7;
}
.alert-danger h4,
.alert-error h4 {
color:#b94a48;
}
.alert-info {
color:#3a87ad;
background:#d9edf7;
border-color:#bce8f1;
}
.alert-info h4 {
color:#3a87ad;
}
.alert-block .close {
right:-1px;
}
.alert-block h4 {
margin:5px 0 10px;
}
.alert-block > p,
.alert-block > ul {
margin-bottom:0;
}
.alert-block p + p {
margin-top:10px;
}
.alert-block code {
display:block;
white-space:normal;
}

.pager li > a,
.pager li > span {
display: inline-block;
padding: 5px 14px;
background-color: #fff;
border: 1px solid #ddd;
-webkit-border-radius: 0px;
-moz-border-radius: 0px;
border-radius: 0px;
}

BIN
interface/favicon.ico View File


BIN
interface/img/asc.png View File


BIN
interface/img/desc.png View File


BIN
interface/img/spinner.gif View File


BIN
interface/img/spinner.png View File


+ 320
- 0
interface/index.html View File

@@ -0,0 +1,320 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Rspamd Web Interface</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="">
<meta name="author" content="">
<link href="//cdnjs.cloudflare.com/ajax/libs/file-uploader/3.7.0/fineuploader.min.css" rel="stylesheet">
<link href="//maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" type="text/css" href="./css/datatables.min.css"/>
<link href="./css/rspamd.css" rel="stylesheet">
</head>

<body>

<nav class="navbar navbar-default" id="navBar">
<div class="container-fluid">
<div class="navbar-header">
<a class="navbar-brand" href="."><b>Rspamd</b></a>
</div>
<ul class="nav pull-right navbar-nav" style="display:none">
<li><a href="#" data-toggle="tab" id="refresh">Refresh</a></li>
<li class="spinner"><a href="#" data-toggle="tab" id="disconnect">Disconnect</a></li>
</ul>
<ul class="nav navbar-nav nav-pills" role="tablist">
<li role="presentation" class="active"><a id="status_nav" aria-controls="status" role="tab" href="#status" data-toggle="tab">Status</a></li>
<li role="presentation"><a id="configuration_nav" aria-controls="configuration" role="tab" href="#configuration" data-toggle="tab">Configuration</a></li>
<li role="presentation"><a id="learning_nav" aria-controls="learning" role="tab" href="#learning" data-toggle="tab">Learning</a></li>
<li role="presentation"><a id="scan_nav"aria-controls="scan" role="tab" href="#scan" data-toggle="tab">Scan</a></li>
<li role="presentation"><a id="history_nav" aria-controls="history" role="tab" href="#history" data-toggle="tab">History</a></li>
</ul>
</div>
</nav>

<div id="mainUI" style="display:none">

<div class="container-fluid">

<div class="tab-content">
<div class="tab-pane active" id="status">
<div class="row">
<div class="col-md-12">
<div class="widget-box widget-plain">
<ul id="statWidgets" class="stat-boxes" style="display:none">
</ul>
</div>
</div>
</div>

<div class="widget-box">
<div class="widget-title">
<span class="icon"><i class="glyphicon glyphicon-signal"></i></span>
<h5>Statistics</h5>
</div>
<div class="widget-content chart-content">
<div class="row row-chart">
<div class="chart" id="chart">
<span class="notice">Loading..</span>
<noscript>Please enable Javascript</noscript>
</div>
</div>
</div>
</div>
</div>

<div class="tab-pane" id="configuration">
<div class="widget-box">
<div class="widget-title">
<span class="icon"><i class="glyphicon glyphicon-tasks"></i></span><h5>Actions</h5>
</div>
<div class="widget-content actions-content" id="actionsBody">
</div>
</div>
<div class="widget-box">
<div class="widget-title">
<span class="icon"><i class="glyphicon glyphicon-filter"></i></span><h5>Rules</h5>
</div>
<div class="widget-content">
<button role="button" class="btn btn-primary"
data-toggle="modal"
data-source="#symbolsForm"
data-target="#modalDialog"
data-title="Symbols">Edit Rules</button>
</div>
</div>
<div class="widget-box">
<div class="widget-title">
<span class="icon"><i class="glyphicon glyphicon-list"></i></span><h5>Lists</h5>
</div>
<div class="widget-content nopadding">
<table class="table table-condensed table-hover" id="listMaps">
</table>
</div>
</div>
</div>

<div class="tab-pane" id="learning">
<div class="widget-box">
<div class="widget-title">
<span class="icon"><i class="glyphicon glyphicon-file"></i></span>
<h5>Learn RSPAMD</h5>
</div>
<div class="widget-content">
<div class="row">
<form class="col-md-6 upload-form" id="uploadSpamForm">
<h5>Upload SPAM examples:</h5>
<button id="uploadSpamTrigger" class="btn pull-right btn-upload-trigger"><i class="glyphicon glyphicon-upload"></i> Upload files</button>
<div id="uploadSpamFiles"></div>
</form>
<form class="col-md-6 upload-form" id="uploadHamForm">
<h5>Upload HAM examples:</h5>
<button id="uploadHamTrigger" class="btn pull-right btn-upload-trigger"><i class="glyphicon glyphicon-upload"></i> Upload files</button>
<div id="uploadHamFiles"></div>
</form>
</div>
</div>
<div class="widget-content">
<div class="row">
<form class="col-md-6">
<h5>Insert raw SPAM source:</h5>
<textarea class="col-md-5 upload-textarea" id="spamTextSource" value=""></textarea>
<p><button class="btn btn-default pull-right" data-upload="spam"><i class="glyphicon glyphicon-upload"></i> Upload text</button></p>
</form>
<form class="col-md-6">
<h5>Insert raw HAM source:</h5>
<textarea class="col-md-5 upload-textarea" id="hamTextSource" value=""></textarea>
<p><button class="btn btn-default pull-right" data-upload="ham"><i class="glyphicon glyphicon-upload"></i> Upload text</button></p>
</form>
</div>
<div class="row">
<form class="col-md-6 upload-form" id="uploadFuzzyForm">
<h5>Upload Fuzzy examples:</h5>
<div class="row">
<label class="pull-left">
Flag
</label>
<div class="pull-right col-md-10">
<input id="fuzzyFlagUpload" class="slider" type="slider" value="0"/>
</div>
</div>
<div class="row">
<label class="pull-left">
Weight
</label>
<div class="pull-right col-md-10">
<input id="fuzzyWeightUpload" class="slider" type="slider" value="0"/>
</div>
</div>
<button id="uploadFuzzyTrigger" class="btn pull-right btn-upload-trigger"><i class="glyphicon glyphicon-upload"></i> Upload files</button>
<div id="uploadFuzzyFiles"></div>
</form>
</div>
<div class="row">
<form class="col-md-6">
<h5>Insert raw Fuzzy storage:</h5>
<textarea class="col-md-5 upload-textarea" id="fuzzyTextSource" value=""></textarea>
<div class="row">
<label class="pull-left">
Flag
</label>
<div class="pull-right col-md-10">
<input id="fuzzyFlagText" class="slider" type="slider" value="0"/>
</div>
</div>
<div class="row">
<label class="pull-left">
Weight
</label>
<div class="pull-right col-md-10">
<input id="fuzzyWeightText" class="slider" type="slider" value="0"/>
</div>
</div>
<p><button class="btn btn-default pull-right" data-upload="fuzzy"><i class="glyphicon glyphicon-upload"></i> Upload text</button></p>
</form>
</div>
</div>
</div>
</div>

<div class="tab-pane" id="scan">
<div class="widget-box">
<div class="widget-title">
<span class="icon"><i class="glyphicon glyphicon-info-sign"></i></span>
<h5>Online scan suspected message</h5>
</div>
<div class="widget-content">
<h5>Paste and scan suspicious message</h5>
<div class="row">
<form class="col-md-12 nomargin" id="scanForm">
<textarea class="col-md-12 scan-textarea" id="scanTextSource"></textarea>
<p><button class="btn btn-default btn-primary" data-upload="scan">Scan message</button>
<button class="btn btn-default pull-right" id="scanClean">Clean form</button></p>
</form>
</div>
<div id="scanResult" style="display:none">
<h4>Scan results:</h4>
<div class="well nomargin nopadding">
<table class="table table-log table-hover" id="scanOutput">
<thead>
<th class="col4" title="Action">Action</th>
<th class="col5" title="Score / Req.&nbsp;score">Score / Req.&nbsp;score</th>
<th class="col6" title="Symbols">Symbols</th>
</thead>
</table>
</div>
</div>
</div>
</div>
</div>

<div class="tab-pane" id="history">

<div class="widget-box">
<div class="widget-title">
<div class="buttons pull-right">
<button class="btn btn-danger btn-sm" id="resetHistory">
<i class="glyphicon glyphicon-remove-circle"></i> Reset
</button>
<button class="btn btn-info btn-sm" id="updateHistory">
<i class="glyphicon glyphicon-refresh"></i> Update
</button>
</div>
<span class="icon"><i class="glyphicon glyphicon-eye-open"></i></span>
<h5>History</h5>
</div>
<div class="widget-content nopadding">
<table class="table table-log table-hover" id="historyLog">
<thead>
<th class="col1" title="Time">Time</th>
<th class="col2" title="ID">ID</th>
<th class="col3" title="IP">IP</th>
<th class="col4" title="Action">Action</th>
<th class="col5" title="Score / Req.&nbsp;score">Score / Req.&nbsp;score</th>
<th class="col6" title="Symbols">Symbols</th>
<th class="col7" title="Size">Size</th>
<th class="col8" title="Scan Time (s)"><div class="cell-overflow">Scan Time (s)</div></th>
<th class="col9" title="User">User</th>
</thead>
</table>
</div>
</div>

</div>


</div>
</div>

</div>

<!-- Common modal -->
<div id="modalDialog" class="modal fade" tabindex="-1" role="dialog" aria-labelledby="modalLabel">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h3 id="modalTitle"></h3>
</div>
<div class="modal-body" id="modalBody">
<div class="progress progress-striped active">
<div class="bar" style="width:100%;"></div>
</div>
</div>
<div class="modal-footer">
<button class="btn btn-default" data-dismiss="modal" aria-hidden="true" id="modalClose">Close</button>
<button class="btn btn-primary" id="modalSave">Save changes</button>
</div>
</div>
</div>
</div>

<!-- login modal -->
<div id="connectDialog" class="modal" tabindex="-1" role="dialog" aria-labelledby="RSPAMD Connect">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h3>RSPAMD Connect</h3>
</div>
<div class="modal-body" id="connectBody">
<form id="connectForm">
<!--
<div class="form-group">
<label class="control-label" for="connectHost">Hostname</label>
<div class="controls">
<input class=col-md-2" type="text" id="connectHost" placeholder="Hostname" tabindex="1">
</div>
</div>
-->
<div class="form-group">
<label class="col-sm-2 control-label" for="connectPassword">Password:</label>
<div class="col-sm-10">
<input class="form-control"
type="password"
id="connectPassword"
placeholder="Password"
tabindex="1" />
</div>
</div>
<div class="form-group">
<button type="submit" id="connectButton" class="btn btn-primary" tabindex="1">Connect</button>
</div>
</form>
</div>
</div>
</div>
</div>

<div id="backDrop" class="modal-backdrop fade in" style="display:none"></div>
<script src="//code.jquery.com/jquery-2.1.4.min.js"></script>
<script src="//maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/jquery-cookie/1.4.1/jquery.cookie.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/file-uploader/3.7.0/fineuploader.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js"></script>
<script src="./js/d3pie.min.js"></script>
<script src="./js/rspamd.js"></script>
<script type="text/javascript" src="./js/datatables.min.js"></script>

</body>
</html>

+ 9
- 0
interface/js/d3pie.min.js
File diff suppressed because it is too large
View File


+ 253
- 0
interface/js/datatables.min.js View File

@@ -0,0 +1,253 @@
/*
* This combined file was created by the DataTables downloader builder:
* https://datatables.net/download
*
* To rebuild or modify this file with the latest versions of the included
* software please visit:
* https://datatables.net/download/#bs-3.3.5/jqc-1.11.3,dt-1.10.9,fh-3.0.0,r-1.0.7,sc-1.3.0
*
* Included libraries:
* jQuery compat 1.11.3, Bootstrap 3.3.5, DataTables 1.10.9, FixedHeader 3.0.0, Responsive 1.0.7, Scroller 1.3.0
*/

/*!
DataTables 1.10.9
©2008-2015 SpryMedia Ltd - datatables.net/license
*/
(function(Fa,T,k){var S=function(h){function X(a){var b,c,d={};h.each(a,function(e){if((b=e.match(/^([^A-Z]+?)([A-Z])/))&&-1!=="a aa ai ao as b fn i m o s ".indexOf(b[1]+" "))c=e.replace(b[0],b[2].toLowerCase()),d[c]=e,"o"===b[1]&&X(a[e])});a._hungarianMap=d}function I(a,b,c){a._hungarianMap||X(a);var d;h.each(b,function(e){d=a._hungarianMap[e];if(d!==k&&(c||b[d]===k))"o"===d.charAt(0)?(b[d]||(b[d]={}),h.extend(!0,b[d],b[e]),I(a[d],b[d],c)):b[d]=b[e]})}function S(a){var b=m.defaults.oLanguage,c=a.sZeroRecords;
!a.sEmptyTable&&(c&&"No data available in table"===b.sEmptyTable)&&F(a,a,"sZeroRecords","sEmptyTable");!a.sLoadingRecords&&(c&&"Loading..."===b.sLoadingRecords)&&F(a,a,"sZeroRecords","sLoadingRecords");a.sInfoThousands&&(a.sThousands=a.sInfoThousands);(a=a.sDecimal)&&cb(a)}function db(a){A(a,"ordering","bSort");A(a,"orderMulti","bSortMulti");A(a,"orderClasses","bSortClasses");A(a,"orderCellsTop","bSortCellsTop");A(a,"order","aaSorting");A(a,"orderFixed","aaSortingFixed");A(a,"paging","bPaginate");
A(a,"pagingType","sPaginationType");A(a,"pageLength","iDisplayLength");A(a,"searching","bFilter");"boolean"===typeof a.sScrollX&&(a.sScrollX=a.sScrollX?"100%":"");if(a=a.aoSearchCols)for(var b=0,c=a.length;b<c;b++)a[b]&&I(m.models.oSearch,a[b])}function eb(a){A(a,"orderable","bSortable");A(a,"orderData","aDataSort");A(a,"orderSequence","asSorting");A(a,"orderDataType","sortDataType");var b=a.aDataSort;b&&!h.isArray(b)&&(a.aDataSort=[b])}function fb(a){if(!m.__browser){var b={};m.__browser=b;var c=
h("<div/>").css({position:"fixed",top:0,left:0,height:1,width:1,overflow:"hidden"}).append(h("<div/>").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(h("<div/>").css({width:"100%",height:10}))).appendTo("body"),d=c.children(),e=d.children();b.barWidth=d[0].offsetWidth-d[0].clientWidth;b.bScrollOversize=100===e[0].offsetWidth&&100!==d[0].clientWidth;b.bScrollbarLeft=1!==Math.round(e.offset().left);b.bBounding=c[0].getBoundingClientRect().width?!0:!1;c.remove()}h.extend(a.oBrowser,
m.__browser);a.oScroll.iBarWidth=m.__browser.barWidth}function gb(a,b,c,d,e,f){var g,i=!1;c!==k&&(g=c,i=!0);for(;d!==e;)a.hasOwnProperty(d)&&(g=i?b(g,a[d],d,a):a[d],i=!0,d+=f);return g}function Ga(a,b){var c=m.defaults.column,d=a.aoColumns.length,c=h.extend({},m.models.oColumn,c,{nTh:b?b:T.createElement("th"),sTitle:c.sTitle?c.sTitle:b?b.innerHTML:"",aDataSort:c.aDataSort?c.aDataSort:[d],mData:c.mData?c.mData:d,idx:d});a.aoColumns.push(c);c=a.aoPreSearchCols;c[d]=h.extend({},m.models.oSearch,c[d]);
la(a,d,h(b).data())}function la(a,b,c){var b=a.aoColumns[b],d=a.oClasses,e=h(b.nTh);if(!b.sWidthOrig){b.sWidthOrig=e.attr("width")||null;var f=(e.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/);f&&(b.sWidthOrig=f[1])}c!==k&&null!==c&&(eb(c),I(m.defaults.column,c),c.mDataProp!==k&&!c.mData&&(c.mData=c.mDataProp),c.sType&&(b._sManualType=c.sType),c.className&&!c.sClass&&(c.sClass=c.className),h.extend(b,c),F(b,c,"sWidth","sWidthOrig"),c.iDataSort!==k&&(b.aDataSort=[c.iDataSort]),F(b,c,"aDataSort"));
var g=b.mData,i=P(g),j=b.mRender?P(b.mRender):null,c=function(a){return"string"===typeof a&&-1!==a.indexOf("@")};b._bAttrSrc=h.isPlainObject(g)&&(c(g.sort)||c(g.type)||c(g.filter));b.fnGetData=function(a,b,c){var d=i(a,b,k,c);return j&&b?j(d,b,a,c):d};b.fnSetData=function(a,b,c){return Q(g)(a,b,c)};"number"!==typeof g&&(a._rowReadObject=!0);a.oFeatures.bSort||(b.bSortable=!1,e.addClass(d.sSortableNone));a=-1!==h.inArray("asc",b.asSorting);c=-1!==h.inArray("desc",b.asSorting);!b.bSortable||!a&&!c?
(b.sSortingClass=d.sSortableNone,b.sSortingClassJUI=""):a&&!c?(b.sSortingClass=d.sSortableAsc,b.sSortingClassJUI=d.sSortJUIAscAllowed):!a&&c?(b.sSortingClass=d.sSortableDesc,b.sSortingClassJUI=d.sSortJUIDescAllowed):(b.sSortingClass=d.sSortable,b.sSortingClassJUI=d.sSortJUI)}function Y(a){if(!1!==a.oFeatures.bAutoWidth){var b=a.aoColumns;Ha(a);for(var c=0,d=b.length;c<d;c++)b[c].nTh.style.width=b[c].sWidth}b=a.oScroll;(""!==b.sY||""!==b.sX)&&Z(a);w(a,null,"column-sizing",[a])}function $(a,b){var c=
aa(a,"bVisible");return"number"===typeof c[b]?c[b]:null}function ba(a,b){var c=aa(a,"bVisible"),c=h.inArray(b,c);return-1!==c?c:null}function ca(a){return aa(a,"bVisible").length}function aa(a,b){var c=[];h.map(a.aoColumns,function(a,e){a[b]&&c.push(e)});return c}function Ia(a){var b=a.aoColumns,c=a.aoData,d=m.ext.type.detect,e,f,g,i,j,h,l,r,q;e=0;for(f=b.length;e<f;e++)if(l=b[e],q=[],!l.sType&&l._sManualType)l.sType=l._sManualType;else if(!l.sType){g=0;for(i=d.length;g<i;g++){j=0;for(h=c.length;j<
h;j++){q[j]===k&&(q[j]=B(a,j,e,"type"));r=d[g](q[j],a);if(!r&&g!==d.length-1)break;if("html"===r)break}if(r){l.sType=r;break}}l.sType||(l.sType="string")}}function hb(a,b,c,d){var e,f,g,i,j,n,l=a.aoColumns;if(b)for(e=b.length-1;0<=e;e--){n=b[e];var r=n.targets!==k?n.targets:n.aTargets;h.isArray(r)||(r=[r]);f=0;for(g=r.length;f<g;f++)if("number"===typeof r[f]&&0<=r[f]){for(;l.length<=r[f];)Ga(a);d(r[f],n)}else if("number"===typeof r[f]&&0>r[f])d(l.length+r[f],n);else if("string"===typeof r[f]){i=0;
for(j=l.length;i<j;i++)("_all"==r[f]||h(l[i].nTh).hasClass(r[f]))&&d(i,n)}}if(c){e=0;for(a=c.length;e<a;e++)d(e,c[e])}}function L(a,b,c,d){var e=a.aoData.length,f=h.extend(!0,{},m.models.oRow,{src:c?"dom":"data",idx:e});f._aData=b;a.aoData.push(f);for(var g=a.aoColumns,i=0,j=g.length;i<j;i++)g[i].sType=null;a.aiDisplayMaster.push(e);b=a.rowIdFn(b);b!==k&&(a.aIds[b]=f);(c||!a.oFeatures.bDeferRender)&&Ja(a,e,c,d);return e}function ma(a,b){var c;b instanceof h||(b=h(b));return b.map(function(b,e){c=
Ka(a,e);return L(a,c.data,e,c.cells)})}function B(a,b,c,d){var e=a.iDraw,f=a.aoColumns[c],g=a.aoData[b]._aData,i=f.sDefaultContent,c=f.fnGetData(g,d,{settings:a,row:b,col:c});if(c===k)return a.iDrawError!=e&&null===i&&(J(a,0,"Requested unknown parameter "+("function"==typeof f.mData?"{function}":"'"+f.mData+"'")+" for row "+b,4),a.iDrawError=e),i;if((c===g||null===c)&&null!==i)c=i;else if("function"===typeof c)return c.call(g);return null===c&&"display"==d?"":c}function ib(a,b,c,d){a.aoColumns[c].fnSetData(a.aoData[b]._aData,
d,{settings:a,row:b,col:c})}function La(a){return h.map(a.match(/(\\.|[^\.])+/g)||[""],function(a){return a.replace(/\\./g,".")})}function P(a){if(h.isPlainObject(a)){var b={};h.each(a,function(a,c){c&&(b[a]=P(c))});return function(a,c,f,g){var i=b[c]||b._;return i!==k?i(a,c,f,g):a}}if(null===a)return function(a){return a};if("function"===typeof a)return function(b,c,f,g){return a(b,c,f,g)};if("string"===typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||-1!==a.indexOf("("))){var c=function(a,b,
f){var g,i;if(""!==f){i=La(f);for(var j=0,n=i.length;j<n;j++){f=i[j].match(da);g=i[j].match(U);if(f){i[j]=i[j].replace(da,"");""!==i[j]&&(a=a[i[j]]);g=[];i.splice(0,j+1);i=i.join(".");if(h.isArray(a)){j=0;for(n=a.length;j<n;j++)g.push(c(a[j],b,i))}a=f[0].substring(1,f[0].length-1);a=""===a?g:g.join(a);break}else if(g){i[j]=i[j].replace(U,"");a=a[i[j]]();continue}if(null===a||a[i[j]]===k)return k;a=a[i[j]]}}return a};return function(b,e){return c(b,e,a)}}return function(b){return b[a]}}function Q(a){if(h.isPlainObject(a))return Q(a._);
if(null===a)return function(){};if("function"===typeof a)return function(b,d,e){a(b,"set",d,e)};if("string"===typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||-1!==a.indexOf("("))){var b=function(a,d,e){var e=La(e),f;f=e[e.length-1];for(var g,i,j=0,n=e.length-1;j<n;j++){g=e[j].match(da);i=e[j].match(U);if(g){e[j]=e[j].replace(da,"");a[e[j]]=[];f=e.slice();f.splice(0,j+1);g=f.join(".");if(h.isArray(d)){i=0;for(n=d.length;i<n;i++)f={},b(f,d[i],g),a[e[j]].push(f)}else a[e[j]]=d;return}i&&(e[j]=e[j].replace(U,
""),a=a[e[j]](d));if(null===a[e[j]]||a[e[j]]===k)a[e[j]]={};a=a[e[j]]}if(f.match(U))a[f.replace(U,"")](d);else a[f.replace(da,"")]=d};return function(c,d){return b(c,d,a)}}return function(b,d){b[a]=d}}function Ma(a){return D(a.aoData,"_aData")}function na(a){a.aoData.length=0;a.aiDisplayMaster.length=0;a.aiDisplay.length=0;a.aIds={}}function oa(a,b,c){for(var d=-1,e=0,f=a.length;e<f;e++)a[e]==b?d=e:a[e]>b&&a[e]--; -1!=d&&c===k&&a.splice(d,1)}function ea(a,b,c,d){var e=a.aoData[b],f,g=function(c,d){for(;c.childNodes.length;)c.removeChild(c.firstChild);
c.innerHTML=B(a,b,d,"display")};if("dom"===c||(!c||"auto"===c)&&"dom"===e.src)e._aData=Ka(a,e,d,d===k?k:e._aData).data;else{var i=e.anCells;if(i)if(d!==k)g(i[d],d);else{c=0;for(f=i.length;c<f;c++)g(i[c],c)}}e._aSortData=null;e._aFilterData=null;g=a.aoColumns;if(d!==k)g[d].sType=null;else{c=0;for(f=g.length;c<f;c++)g[c].sType=null;Na(a,e)}}function Ka(a,b,c,d){var e=[],f=b.firstChild,g,i,j=0,n,l=a.aoColumns,r=a._rowReadObject,d=d!==k?d:r?{}:[],q=function(a,b){if("string"===typeof a){var c=a.indexOf("@");
-1!==c&&(c=a.substring(c+1),Q(a)(d,b.getAttribute(c)))}},jb=function(a){if(c===k||c===j)i=l[j],n=h.trim(a.innerHTML),i&&i._bAttrSrc?(Q(i.mData._)(d,n),q(i.mData.sort,a),q(i.mData.type,a),q(i.mData.filter,a)):r?(i._setter||(i._setter=Q(i.mData)),i._setter(d,n)):d[j]=n;j++};if(f)for(;f;){g=f.nodeName.toUpperCase();if("TD"==g||"TH"==g)jb(f),e.push(f);f=f.nextSibling}else{e=b.anCells;g=0;for(var o=e.length;g<o;g++)jb(e[g])}if(b=f?b:b.nTr)(b=b.getAttribute("id"))&&Q(a.rowId)(d,b);return{data:d,cells:e}}
function Ja(a,b,c,d){var e=a.aoData[b],f=e._aData,g=[],i,j,h,l,r;if(null===e.nTr){i=c||T.createElement("tr");e.nTr=i;e.anCells=g;i._DT_RowIndex=b;Na(a,e);l=0;for(r=a.aoColumns.length;l<r;l++){h=a.aoColumns[l];j=c?d[l]:T.createElement(h.sCellType);g.push(j);if(!c||h.mRender||h.mData!==l)j.innerHTML=B(a,b,l,"display");h.sClass&&(j.className+=" "+h.sClass);h.bVisible&&!c?i.appendChild(j):!h.bVisible&&c&&j.parentNode.removeChild(j);h.fnCreatedCell&&h.fnCreatedCell.call(a.oInstance,j,B(a,b,l),f,b,l)}w(a,
"aoRowCreatedCallback",null,[i,f,b])}e.nTr.setAttribute("role","row")}function Na(a,b){var c=b.nTr,d=b._aData;if(c){var e=a.rowIdFn(d);e&&(c.id=e);d.DT_RowClass&&(e=d.DT_RowClass.split(" "),b.__rowc=b.__rowc?pa(b.__rowc.concat(e)):e,h(c).removeClass(b.__rowc.join(" ")).addClass(d.DT_RowClass));d.DT_RowAttr&&h(c).attr(d.DT_RowAttr);d.DT_RowData&&h(c).data(d.DT_RowData)}}function kb(a){var b,c,d,e,f,g=a.nTHead,i=a.nTFoot,j=0===h("th, td",g).length,n=a.oClasses,l=a.aoColumns;j&&(e=h("<tr/>").appendTo(g));
b=0;for(c=l.length;b<c;b++)f=l[b],d=h(f.nTh).addClass(f.sClass),j&&d.appendTo(e),a.oFeatures.bSort&&(d.addClass(f.sSortingClass),!1!==f.bSortable&&(d.attr("tabindex",a.iTabIndex).attr("aria-controls",a.sTableId),Oa(a,f.nTh,b))),f.sTitle!=d[0].innerHTML&&d.html(f.sTitle),Pa(a,"header")(a,d,f,n);j&&fa(a.aoHeader,g);h(g).find(">tr").attr("role","row");h(g).find(">tr>th, >tr>td").addClass(n.sHeaderTH);h(i).find(">tr>th, >tr>td").addClass(n.sFooterTH);if(null!==i){a=a.aoFooter[0];b=0;for(c=a.length;b<
c;b++)f=l[b],f.nTf=a[b].cell,f.sClass&&h(f.nTf).addClass(f.sClass)}}function ga(a,b,c){var d,e,f,g=[],i=[],j=a.aoColumns.length,n;if(b){c===k&&(c=!1);d=0;for(e=b.length;d<e;d++){g[d]=b[d].slice();g[d].nTr=b[d].nTr;for(f=j-1;0<=f;f--)!a.aoColumns[f].bVisible&&!c&&g[d].splice(f,1);i.push([])}d=0;for(e=g.length;d<e;d++){if(a=g[d].nTr)for(;f=a.firstChild;)a.removeChild(f);f=0;for(b=g[d].length;f<b;f++)if(n=j=1,i[d][f]===k){a.appendChild(g[d][f].cell);for(i[d][f]=1;g[d+j]!==k&&g[d][f].cell==g[d+j][f].cell;)i[d+
j][f]=1,j++;for(;g[d][f+n]!==k&&g[d][f].cell==g[d][f+n].cell;){for(c=0;c<j;c++)i[d+c][f+n]=1;n++}h(g[d][f].cell).attr("rowspan",j).attr("colspan",n)}}}}function M(a){var b=w(a,"aoPreDrawCallback","preDraw",[a]);if(-1!==h.inArray(!1,b))C(a,!1);else{var b=[],c=0,d=a.asStripeClasses,e=d.length,f=a.oLanguage,g=a.iInitDisplayStart,i="ssp"==y(a),j=a.aiDisplay;a.bDrawing=!0;g!==k&&-1!==g&&(a._iDisplayStart=i?g:g>=a.fnRecordsDisplay()?0:g,a.iInitDisplayStart=-1);var g=a._iDisplayStart,n=a.fnDisplayEnd();
if(a.bDeferLoading)a.bDeferLoading=!1,a.iDraw++,C(a,!1);else if(i){if(!a.bDestroying&&!lb(a))return}else a.iDraw++;if(0!==j.length){f=i?a.aoData.length:n;for(i=i?0:g;i<f;i++){var l=j[i],r=a.aoData[l];null===r.nTr&&Ja(a,l);l=r.nTr;if(0!==e){var q=d[c%e];r._sRowStripe!=q&&(h(l).removeClass(r._sRowStripe).addClass(q),r._sRowStripe=q)}w(a,"aoRowCallback",null,[l,r._aData,c,i]);b.push(l);c++}}else c=f.sZeroRecords,1==a.iDraw&&"ajax"==y(a)?c=f.sLoadingRecords:f.sEmptyTable&&0===a.fnRecordsTotal()&&(c=f.sEmptyTable),
b[0]=h("<tr/>",{"class":e?d[0]:""}).append(h("<td />",{valign:"top",colSpan:ca(a),"class":a.oClasses.sRowEmpty}).html(c))[0];w(a,"aoHeaderCallback","header",[h(a.nTHead).children("tr")[0],Ma(a),g,n,j]);w(a,"aoFooterCallback","footer",[h(a.nTFoot).children("tr")[0],Ma(a),g,n,j]);d=h(a.nTBody);d.children().detach();d.append(h(b));w(a,"aoDrawCallback","draw",[a]);a.bSorted=!1;a.bFiltered=!1;a.bDrawing=!1}}function R(a,b){var c=a.oFeatures,d=c.bFilter;c.bSort&&mb(a);d?ha(a,a.oPreviousSearch):a.aiDisplay=
a.aiDisplayMaster.slice();!0!==b&&(a._iDisplayStart=0);a._drawHold=b;M(a);a._drawHold=!1}function nb(a){var b=a.oClasses,c=h(a.nTable),c=h("<div/>").insertBefore(c),d=a.oFeatures,e=h("<div/>",{id:a.sTableId+"_wrapper","class":b.sWrapper+(a.nTFoot?"":" "+b.sNoFooter)});a.nHolding=c[0];a.nTableWrapper=e[0];a.nTableReinsertBefore=a.nTable.nextSibling;for(var f=a.sDom.split(""),g,i,j,n,l,r,q=0;q<f.length;q++){g=null;i=f[q];if("<"==i){j=h("<div/>")[0];n=f[q+1];if("'"==n||'"'==n){l="";for(r=2;f[q+r]!=n;)l+=
f[q+r],r++;"H"==l?l=b.sJUIHeader:"F"==l&&(l=b.sJUIFooter);-1!=l.indexOf(".")?(n=l.split("."),j.id=n[0].substr(1,n[0].length-1),j.className=n[1]):"#"==l.charAt(0)?j.id=l.substr(1,l.length-1):j.className=l;q+=r}e.append(j);e=h(j)}else if(">"==i)e=e.parent();else if("l"==i&&d.bPaginate&&d.bLengthChange)g=ob(a);else if("f"==i&&d.bFilter)g=pb(a);else if("r"==i&&d.bProcessing)g=qb(a);else if("t"==i)g=rb(a);else if("i"==i&&d.bInfo)g=sb(a);else if("p"==i&&d.bPaginate)g=tb(a);else if(0!==m.ext.feature.length){j=
m.ext.feature;r=0;for(n=j.length;r<n;r++)if(i==j[r].cFeature){g=j[r].fnInit(a);break}}g&&(j=a.aanFeatures,j[i]||(j[i]=[]),j[i].push(g),e.append(g))}c.replaceWith(e);a.nHolding=null}function fa(a,b){var c=h(b).children("tr"),d,e,f,g,i,j,n,l,r,q;a.splice(0,a.length);f=0;for(j=c.length;f<j;f++)a.push([]);f=0;for(j=c.length;f<j;f++){d=c[f];for(e=d.firstChild;e;){if("TD"==e.nodeName.toUpperCase()||"TH"==e.nodeName.toUpperCase()){l=1*e.getAttribute("colspan");r=1*e.getAttribute("rowspan");l=!l||0===l||
1===l?1:l;r=!r||0===r||1===r?1:r;g=0;for(i=a[f];i[g];)g++;n=g;q=1===l?!0:!1;for(i=0;i<l;i++)for(g=0;g<r;g++)a[f+g][n+i]={cell:e,unique:q},a[f+g].nTr=d}e=e.nextSibling}}}function qa(a,b,c){var d=[];c||(c=a.aoHeader,b&&(c=[],fa(c,b)));for(var b=0,e=c.length;b<e;b++)for(var f=0,g=c[b].length;f<g;f++)if(c[b][f].unique&&(!d[f]||!a.bSortCellsTop))d[f]=c[b][f].cell;return d}function ra(a,b,c){w(a,"aoServerParams","serverParams",[b]);if(b&&h.isArray(b)){var d={},e=/(.*?)\[\]$/;h.each(b,function(a,b){var c=
b.name.match(e);c?(c=c[0],d[c]||(d[c]=[]),d[c].push(b.value)):d[b.name]=b.value});b=d}var f,g=a.ajax,i=a.oInstance,j=function(b){w(a,null,"xhr",[a,b,a.jqXHR]);c(b)};if(h.isPlainObject(g)&&g.data){f=g.data;var n=h.isFunction(f)?f(b,a):f,b=h.isFunction(f)&&n?n:h.extend(!0,b,n);delete g.data}n={data:b,success:function(b){var c=b.error||b.sError;c&&J(a,0,c);a.json=b;j(b)},dataType:"json",cache:!1,type:a.sServerMethod,error:function(b,c){var d=w(a,null,"xhr",[a,null,a.jqXHR]);-1===h.inArray(!0,d)&&("parsererror"==
c?J(a,0,"Invalid JSON response",1):4===b.readyState&&J(a,0,"Ajax error",7));C(a,!1)}};a.oAjaxData=b;w(a,null,"preXhr",[a,b]);a.fnServerData?a.fnServerData.call(i,a.sAjaxSource,h.map(b,function(a,b){return{name:b,value:a}}),j,a):a.sAjaxSource||"string"===typeof g?a.jqXHR=h.ajax(h.extend(n,{url:g||a.sAjaxSource})):h.isFunction(g)?a.jqXHR=g.call(i,b,j,a):(a.jqXHR=h.ajax(h.extend(n,g)),g.data=f)}function lb(a){return a.bAjaxDataGet?(a.iDraw++,C(a,!0),ra(a,ub(a),function(b){vb(a,b)}),!1):!0}function ub(a){var b=
a.aoColumns,c=b.length,d=a.oFeatures,e=a.oPreviousSearch,f=a.aoPreSearchCols,g,i=[],j,n,l,r=V(a);g=a._iDisplayStart;j=!1!==d.bPaginate?a._iDisplayLength:-1;var q=function(a,b){i.push({name:a,value:b})};q("sEcho",a.iDraw);q("iColumns",c);q("sColumns",D(b,"sName").join(","));q("iDisplayStart",g);q("iDisplayLength",j);var k={draw:a.iDraw,columns:[],order:[],start:g,length:j,search:{value:e.sSearch,regex:e.bRegex}};for(g=0;g<c;g++)n=b[g],l=f[g],j="function"==typeof n.mData?"function":n.mData,k.columns.push({data:j,
name:n.sName,searchable:n.bSearchable,orderable:n.bSortable,search:{value:l.sSearch,regex:l.bRegex}}),q("mDataProp_"+g,j),d.bFilter&&(q("sSearch_"+g,l.sSearch),q("bRegex_"+g,l.bRegex),q("bSearchable_"+g,n.bSearchable)),d.bSort&&q("bSortable_"+g,n.bSortable);d.bFilter&&(q("sSearch",e.sSearch),q("bRegex",e.bRegex));d.bSort&&(h.each(r,function(a,b){k.order.push({column:b.col,dir:b.dir});q("iSortCol_"+a,b.col);q("sSortDir_"+a,b.dir)}),q("iSortingCols",r.length));b=m.ext.legacy.ajax;return null===b?a.sAjaxSource?
i:k:b?i:k}function vb(a,b){var c=sa(a,b),d=b.sEcho!==k?b.sEcho:b.draw,e=b.iTotalRecords!==k?b.iTotalRecords:b.recordsTotal,f=b.iTotalDisplayRecords!==k?b.iTotalDisplayRecords:b.recordsFiltered;if(d){if(1*d<a.iDraw)return;a.iDraw=1*d}na(a);a._iRecordsTotal=parseInt(e,10);a._iRecordsDisplay=parseInt(f,10);d=0;for(e=c.length;d<e;d++)L(a,c[d]);a.aiDisplay=a.aiDisplayMaster.slice();a.bAjaxDataGet=!1;M(a);a._bInitComplete||ta(a,b);a.bAjaxDataGet=!0;C(a,!1)}function sa(a,b){var c=h.isPlainObject(a.ajax)&&
a.ajax.dataSrc!==k?a.ajax.dataSrc:a.sAjaxDataProp;return"data"===c?b.aaData||b[c]:""!==c?P(c)(b):b}function pb(a){var b=a.oClasses,c=a.sTableId,d=a.oLanguage,e=a.oPreviousSearch,f=a.aanFeatures,g='<input type="search" class="'+b.sFilterInput+'"/>',i=d.sSearch,i=i.match(/_INPUT_/)?i.replace("_INPUT_",g):i+g,b=h("<div/>",{id:!f.f?c+"_filter":null,"class":b.sFilter}).append(h("<label/>").append(i)),f=function(){var b=!this.value?"":this.value;b!=e.sSearch&&(ha(a,{sSearch:b,bRegex:e.bRegex,bSmart:e.bSmart,
bCaseInsensitive:e.bCaseInsensitive}),a._iDisplayStart=0,M(a))},g=null!==a.searchDelay?a.searchDelay:"ssp"===y(a)?400:0,j=h("input",b).val(e.sSearch).attr("placeholder",d.sSearchPlaceholder).bind("keyup.DT search.DT input.DT paste.DT cut.DT",g?ua(f,g):f).bind("keypress.DT",function(a){if(13==a.keyCode)return!1}).attr("aria-controls",c);h(a.nTable).on("search.dt.DT",function(b,c){if(a===c)try{j[0]!==T.activeElement&&j.val(e.sSearch)}catch(d){}});return b[0]}function ha(a,b,c){var d=a.oPreviousSearch,
e=a.aoPreSearchCols,f=function(a){d.sSearch=a.sSearch;d.bRegex=a.bRegex;d.bSmart=a.bSmart;d.bCaseInsensitive=a.bCaseInsensitive};Ia(a);if("ssp"!=y(a)){wb(a,b.sSearch,c,b.bEscapeRegex!==k?!b.bEscapeRegex:b.bRegex,b.bSmart,b.bCaseInsensitive);f(b);for(b=0;b<e.length;b++)xb(a,e[b].sSearch,b,e[b].bEscapeRegex!==k?!e[b].bEscapeRegex:e[b].bRegex,e[b].bSmart,e[b].bCaseInsensitive);yb(a)}else f(b);a.bFiltered=!0;w(a,null,"search",[a])}function yb(a){for(var b=m.ext.search,c=a.aiDisplay,d,e,f=0,g=b.length;f<
g;f++){for(var i=[],j=0,n=c.length;j<n;j++)e=c[j],d=a.aoData[e],b[f](a,d._aFilterData,e,d._aData,j)&&i.push(e);c.length=0;h.merge(c,i)}}function xb(a,b,c,d,e,f){if(""!==b)for(var g=a.aiDisplay,d=Qa(b,d,e,f),e=g.length-1;0<=e;e--)b=a.aoData[g[e]]._aFilterData[c],d.test(b)||g.splice(e,1)}function wb(a,b,c,d,e,f){var d=Qa(b,d,e,f),e=a.oPreviousSearch.sSearch,f=a.aiDisplayMaster,g;0!==m.ext.search.length&&(c=!0);g=zb(a);if(0>=b.length)a.aiDisplay=f.slice();else{if(g||c||e.length>b.length||0!==b.indexOf(e)||
a.bSorted)a.aiDisplay=f.slice();b=a.aiDisplay;for(c=b.length-1;0<=c;c--)d.test(a.aoData[b[c]]._sFilterRow)||b.splice(c,1)}}function Qa(a,b,c,d){a=b?a:va(a);c&&(a="^(?=.*?"+h.map(a.match(/"[^"]+"|[^ ]+/g)||[""],function(a){if('"'===a.charAt(0))var b=a.match(/^"(.*)"$/),a=b?b[1]:a;return a.replace('"',"")}).join(")(?=.*?")+").*$");return RegExp(a,d?"i":"")}function va(a){return a.replace(Yb,"\\$1")}function zb(a){var b=a.aoColumns,c,d,e,f,g,i,j,h,l=m.ext.type.search;c=!1;d=0;for(f=a.aoData.length;d<
f;d++)if(h=a.aoData[d],!h._aFilterData){i=[];e=0;for(g=b.length;e<g;e++)c=b[e],c.bSearchable?(j=B(a,d,e,"filter"),l[c.sType]&&(j=l[c.sType](j)),null===j&&(j=""),"string"!==typeof j&&j.toString&&(j=j.toString())):j="",j.indexOf&&-1!==j.indexOf("&")&&(wa.innerHTML=j,j=Zb?wa.textContent:wa.innerText),j.replace&&(j=j.replace(/[\r\n]/g,"")),i.push(j);h._aFilterData=i;h._sFilterRow=i.join(" ");c=!0}return c}function Ab(a){return{search:a.sSearch,smart:a.bSmart,regex:a.bRegex,caseInsensitive:a.bCaseInsensitive}}
function Bb(a){return{sSearch:a.search,bSmart:a.smart,bRegex:a.regex,bCaseInsensitive:a.caseInsensitive}}function sb(a){var b=a.sTableId,c=a.aanFeatures.i,d=h("<div/>",{"class":a.oClasses.sInfo,id:!c?b+"_info":null});c||(a.aoDrawCallback.push({fn:Cb,sName:"information"}),d.attr("role","status").attr("aria-live","polite"),h(a.nTable).attr("aria-describedby",b+"_info"));return d[0]}function Cb(a){var b=a.aanFeatures.i;if(0!==b.length){var c=a.oLanguage,d=a._iDisplayStart+1,e=a.fnDisplayEnd(),f=a.fnRecordsTotal(),
g=a.fnRecordsDisplay(),i=g?c.sInfo:c.sInfoEmpty;g!==f&&(i+=" "+c.sInfoFiltered);i+=c.sInfoPostFix;i=Db(a,i);c=c.fnInfoCallback;null!==c&&(i=c.call(a.oInstance,a,d,e,f,g,i));h(b).html(i)}}function Db(a,b){var c=a.fnFormatNumber,d=a._iDisplayStart+1,e=a._iDisplayLength,f=a.fnRecordsDisplay(),g=-1===e;return b.replace(/_START_/g,c.call(a,d)).replace(/_END_/g,c.call(a,a.fnDisplayEnd())).replace(/_MAX_/g,c.call(a,a.fnRecordsTotal())).replace(/_TOTAL_/g,c.call(a,f)).replace(/_PAGE_/g,c.call(a,g?1:Math.ceil(d/
e))).replace(/_PAGES_/g,c.call(a,g?1:Math.ceil(f/e)))}function ia(a){var b,c,d=a.iInitDisplayStart,e=a.aoColumns,f;c=a.oFeatures;var g=a.bDeferLoading;if(a.bInitialised){nb(a);kb(a);ga(a,a.aoHeader);ga(a,a.aoFooter);C(a,!0);c.bAutoWidth&&Ha(a);b=0;for(c=e.length;b<c;b++)f=e[b],f.sWidth&&(f.nTh.style.width=u(f.sWidth));w(a,null,"preInit",[a]);R(a);e=y(a);if("ssp"!=e||g)"ajax"==e?ra(a,[],function(c){var f=sa(a,c);for(b=0;b<f.length;b++)L(a,f[b]);a.iInitDisplayStart=d;R(a);C(a,!1);ta(a,c)},a):(C(a,!1),
ta(a))}else setTimeout(function(){ia(a)},200)}function ta(a,b){a._bInitComplete=!0;(b||a.oInit.aaData)&&Y(a);w(a,"aoInitComplete","init",[a,b])}function Ra(a,b){var c=parseInt(b,10);a._iDisplayLength=c;Sa(a);w(a,null,"length",[a,c])}function ob(a){for(var b=a.oClasses,c=a.sTableId,d=a.aLengthMenu,e=h.isArray(d[0]),f=e?d[0]:d,d=e?d[1]:d,e=h("<select/>",{name:c+"_length","aria-controls":c,"class":b.sLengthSelect}),g=0,i=f.length;g<i;g++)e[0][g]=new Option(d[g],f[g]);var j=h("<div><label/></div>").addClass(b.sLength);
a.aanFeatures.l||(j[0].id=c+"_length");j.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",e[0].outerHTML));h("select",j).val(a._iDisplayLength).bind("change.DT",function(){Ra(a,h(this).val());M(a)});h(a.nTable).bind("length.dt.DT",function(b,c,d){a===c&&h("select",j).val(d)});return j[0]}function tb(a){var b=a.sPaginationType,c=m.ext.pager[b],d="function"===typeof c,e=function(a){M(a)},b=h("<div/>").addClass(a.oClasses.sPaging+b)[0],f=a.aanFeatures;d||c.fnInit(a,b,e);f.p||(b.id=a.sTableId+
"_paginate",a.aoDrawCallback.push({fn:function(a){if(d){var b=a._iDisplayStart,j=a._iDisplayLength,h=a.fnRecordsDisplay(),l=-1===j,b=l?0:Math.ceil(b/j),j=l?1:Math.ceil(h/j),h=c(b,j),k,l=0;for(k=f.p.length;l<k;l++)Pa(a,"pageButton")(a,f.p[l],l,h,b,j)}else c.fnUpdate(a,e)},sName:"pagination"}));return b}function Ta(a,b,c){var d=a._iDisplayStart,e=a._iDisplayLength,f=a.fnRecordsDisplay();0===f||-1===e?d=0:"number"===typeof b?(d=b*e,d>f&&(d=0)):"first"==b?d=0:"previous"==b?(d=0<=e?d-e:0,0>d&&(d=0)):"next"==
b?d+e<f&&(d+=e):"last"==b?d=Math.floor((f-1)/e)*e:J(a,0,"Unknown paging action: "+b,5);b=a._iDisplayStart!==d;a._iDisplayStart=d;b&&(w(a,null,"page",[a]),c&&M(a));return b}function qb(a){return h("<div/>",{id:!a.aanFeatures.r?a.sTableId+"_processing":null,"class":a.oClasses.sProcessing}).html(a.oLanguage.sProcessing).insertBefore(a.nTable)[0]}function C(a,b){a.oFeatures.bProcessing&&h(a.aanFeatures.r).css("display",b?"block":"none");w(a,null,"processing",[a,b])}function rb(a){var b=h(a.nTable);b.attr("role",
"grid");var c=a.oScroll;if(""===c.sX&&""===c.sY)return a.nTable;var d=c.sX,e=c.sY,f=a.oClasses,g=b.children("caption"),i=g.length?g[0]._captionSide:null,j=h(b[0].cloneNode(!1)),n=h(b[0].cloneNode(!1)),l=b.children("tfoot");c.sX&&"100%"===b.attr("width")&&b.removeAttr("width");l.length||(l=null);j=h("<div/>",{"class":f.sScrollWrapper}).append(h("<div/>",{"class":f.sScrollHead}).css({overflow:"hidden",position:"relative",border:0,width:d?!d?null:u(d):"100%"}).append(h("<div/>",{"class":f.sScrollHeadInner}).css({"box-sizing":"content-box",
width:c.sXInner||"100%"}).append(j.removeAttr("id").css("margin-left",0).append("top"===i?g:null).append(b.children("thead"))))).append(h("<div/>",{"class":f.sScrollBody}).css({position:"relative",overflow:"auto",width:!d?null:u(d)}).append(b));l&&j.append(h("<div/>",{"class":f.sScrollFoot}).css({overflow:"hidden",border:0,width:d?!d?null:u(d):"100%"}).append(h("<div/>",{"class":f.sScrollFootInner}).append(n.removeAttr("id").css("margin-left",0).append("bottom"===i?g:null).append(b.children("tfoot")))));
var b=j.children(),k=b[0],f=b[1],q=l?b[2]:null;if(d)h(f).on("scroll.DT",function(){var a=this.scrollLeft;k.scrollLeft=a;l&&(q.scrollLeft=a)});h(f).css(e&&c.bCollapse?"max-height":"height",e);a.nScrollHead=k;a.nScrollBody=f;a.nScrollFoot=q;a.aoDrawCallback.push({fn:Z,sName:"scrolling"});return j[0]}function Z(a){var b=a.oScroll,c=b.sX,d=b.sXInner,e=b.sY,b=b.iBarWidth,f=h(a.nScrollHead),g=f[0].style,i=f.children("div"),j=i[0].style,n=i.children("table"),i=a.nScrollBody,l=h(i),k=i.style,q=h(a.nScrollFoot).children("div"),
m=q.children("table"),o=h(a.nTHead),E=h(a.nTable),p=E[0],t=p.style,N=a.nTFoot?h(a.nTFoot):null,Eb=a.oBrowser,w=Eb.bScrollOversize,s,v,O,x,y=[],z=[],A=[],B,C=function(a){a=a.style;a.paddingTop="0";a.paddingBottom="0";a.borderTopWidth="0";a.borderBottomWidth="0";a.height=0};E.children("thead, tfoot").remove();x=o.clone().prependTo(E);o=o.find("tr");v=x.find("tr");x.find("th, td").removeAttr("tabindex");N&&(O=N.clone().prependTo(E),s=N.find("tr"),O=O.find("tr"));c||(k.width="100%",f[0].style.width="100%");
h.each(qa(a,x),function(b,c){B=$(a,b);c.style.width=a.aoColumns[B].sWidth});N&&H(function(a){a.style.width=""},O);f=E.outerWidth();if(""===c){t.width="100%";if(w&&(E.find("tbody").height()>i.offsetHeight||"scroll"==l.css("overflow-y")))t.width=u(E.outerWidth()-b);f=E.outerWidth()}else""!==d&&(t.width=u(d),f=E.outerWidth());H(C,v);H(function(a){A.push(a.innerHTML);y.push(u(h(a).css("width")))},v);H(function(a,b){a.style.width=y[b]},o);h(v).height(0);N&&(H(C,O),H(function(a){z.push(u(h(a).css("width")))},
O),H(function(a,b){a.style.width=z[b]},s),h(O).height(0));H(function(a,b){a.innerHTML='<div class="dataTables_sizing" style="height:0;overflow:hidden;">'+A[b]+"</div>";a.style.width=y[b]},v);N&&H(function(a,b){a.innerHTML="";a.style.width=z[b]},O);if(E.outerWidth()<f){s=i.scrollHeight>i.offsetHeight||"scroll"==l.css("overflow-y")?f+b:f;if(w&&(i.scrollHeight>i.offsetHeight||"scroll"==l.css("overflow-y")))t.width=u(s-b);(""===c||""!==d)&&J(a,1,"Possible column misalignment",6)}else s="100%";k.width=
u(s);g.width=u(s);N&&(a.nScrollFoot.style.width=u(s));!e&&w&&(k.height=u(p.offsetHeight+b));c=E.outerWidth();n[0].style.width=u(c);j.width=u(c);d=E.height()>i.clientHeight||"scroll"==l.css("overflow-y");e="padding"+(Eb.bScrollbarLeft?"Left":"Right");j[e]=d?b+"px":"0px";N&&(m[0].style.width=u(c),q[0].style.width=u(c),q[0].style[e]=d?b+"px":"0px");l.scroll();if((a.bSorted||a.bFiltered)&&!a._drawHold)i.scrollTop=0}function H(a,b,c){for(var d=0,e=0,f=b.length,g,i;e<f;){g=b[e].firstChild;for(i=c?c[e].firstChild:
null;g;)1===g.nodeType&&(c?a(g,i,d):a(g,d),d++),g=g.nextSibling,i=c?i.nextSibling:null;e++}}function Ha(a){var b=a.nTable,c=a.aoColumns,d=a.oScroll,e=d.sY,f=d.sX,g=d.sXInner,i=c.length,j=aa(a,"bVisible"),n=h("th",a.nTHead),l=b.getAttribute("width"),k=b.parentNode,q=!1,m,o,p;p=a.oBrowser;d=p.bScrollOversize;(m=b.style.width)&&-1!==m.indexOf("%")&&(l=m);for(m=0;m<j.length;m++)o=c[j[m]],null!==o.sWidth&&(o.sWidth=Fb(o.sWidthOrig,k),q=!0);if(d||!q&&!f&&!e&&i==ca(a)&&i==n.length)for(m=0;m<i;m++){if(j=
$(a,m))c[j].sWidth=u(n.eq(m).width())}else{i=h(b).clone().css("visibility","hidden").removeAttr("id");i.find("tbody tr").remove();var t=h("<tr/>").appendTo(i.find("tbody"));i.find("thead, tfoot").remove();i.append(h(a.nTHead).clone()).append(h(a.nTFoot).clone());i.find("tfoot th, tfoot td").css("width","");n=qa(a,i.find("thead")[0]);for(m=0;m<j.length;m++)o=c[j[m]],n[m].style.width=null!==o.sWidthOrig&&""!==o.sWidthOrig?u(o.sWidthOrig):"";if(a.aoData.length)for(m=0;m<j.length;m++)q=j[m],o=c[q],h(Gb(a,
q)).clone(!1).append(o.sContentPadding).appendTo(t);q=h("<div/>").css(f||e?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(i).appendTo(k);f&&g?i.width(g):f?(i.css("width","auto"),i.width()<k.clientWidth&&i.width(k.clientWidth)):e?i.width(k.clientWidth):l&&i.width(l);if(f){for(m=g=0;m<j.length;m++)o=c[j[m]],e=p.bBounding?n[m].getBoundingClientRect().width:h(n[m]).outerWidth(),g+=null===o.sWidthOrig?e:parseInt(o.sWidth,10)+e-h(n[m]).width();i.width(u(g));b.style.width=
u(g)}for(m=0;m<j.length;m++)if(o=c[j[m]],p=h(n[m]).width())o.sWidth=u(p);b.style.width=u(i.css("width"));q.remove()}l&&(b.style.width=u(l));if((l||f)&&!a._reszEvt)b=function(){h(Fa).bind("resize.DT-"+a.sInstance,ua(function(){Y(a)}))},d?setTimeout(b,1E3):b(),a._reszEvt=!0}function ua(a,b){var c=b!==k?b:200,d,e;return function(){var b=this,g=+new Date,i=arguments;d&&g<d+c?(clearTimeout(e),e=setTimeout(function(){d=k;a.apply(b,i)},c)):(d=g,a.apply(b,i))}}function Fb(a,b){if(!a)return 0;var c=h("<div/>").css("width",
u(a)).appendTo(b||T.body),d=c[0].offsetWidth;c.remove();return d}function Gb(a,b){var c=Hb(a,b);if(0>c)return null;var d=a.aoData[c];return!d.nTr?h("<td/>").html(B(a,c,b,"display"))[0]:d.anCells[b]}function Hb(a,b){for(var c,d=-1,e=-1,f=0,g=a.aoData.length;f<g;f++)c=B(a,f,b,"display")+"",c=c.replace($b,""),c.length>d&&(d=c.length,e=f);return e}function u(a){return null===a?"0px":"number"==typeof a?0>a?"0px":a+"px":a.match(/\d$/)?a+"px":a}function V(a){var b,c,d=[],e=a.aoColumns,f,g,i,j;b=a.aaSortingFixed;
c=h.isPlainObject(b);var n=[];f=function(a){a.length&&!h.isArray(a[0])?n.push(a):h.merge(n,a)};h.isArray(b)&&f(b);c&&b.pre&&f(b.pre);f(a.aaSorting);c&&b.post&&f(b.post);for(a=0;a<n.length;a++){j=n[a][0];f=e[j].aDataSort;b=0;for(c=f.length;b<c;b++)g=f[b],i=e[g].sType||"string",n[a]._idx===k&&(n[a]._idx=h.inArray(n[a][1],e[g].asSorting)),d.push({src:j,col:g,dir:n[a][1],index:n[a]._idx,type:i,formatter:m.ext.type.order[i+"-pre"]})}return d}function mb(a){var b,c,d=[],e=m.ext.type.order,f=a.aoData,g=
0,i,j=a.aiDisplayMaster,h;Ia(a);h=V(a);b=0;for(c=h.length;b<c;b++)i=h[b],i.formatter&&g++,Ib(a,i.col);if("ssp"!=y(a)&&0!==h.length){b=0;for(c=j.length;b<c;b++)d[j[b]]=b;g===h.length?j.sort(function(a,b){var c,e,g,i,j=h.length,k=f[a]._aSortData,m=f[b]._aSortData;for(g=0;g<j;g++)if(i=h[g],c=k[i.col],e=m[i.col],c=c<e?-1:c>e?1:0,0!==c)return"asc"===i.dir?c:-c;c=d[a];e=d[b];return c<e?-1:c>e?1:0}):j.sort(function(a,b){var c,g,i,j,k=h.length,m=f[a]._aSortData,p=f[b]._aSortData;for(i=0;i<k;i++)if(j=h[i],
c=m[j.col],g=p[j.col],j=e[j.type+"-"+j.dir]||e["string-"+j.dir],c=j(c,g),0!==c)return c;c=d[a];g=d[b];return c<g?-1:c>g?1:0})}a.bSorted=!0}function Jb(a){for(var b,c,d=a.aoColumns,e=V(a),a=a.oLanguage.oAria,f=0,g=d.length;f<g;f++){c=d[f];var i=c.asSorting;b=c.sTitle.replace(/<.*?>/g,"");var j=c.nTh;j.removeAttribute("aria-sort");c.bSortable&&(0<e.length&&e[0].col==f?(j.setAttribute("aria-sort","asc"==e[0].dir?"ascending":"descending"),c=i[e[0].index+1]||i[0]):c=i[0],b+="asc"===c?a.sSortAscending:
a.sSortDescending);j.setAttribute("aria-label",b)}}function Ua(a,b,c,d){var e=a.aaSorting,f=a.aoColumns[b].asSorting,g=function(a,b){var c=a._idx;c===k&&(c=h.inArray(a[1],f));return c+1<f.length?c+1:b?null:0};"number"===typeof e[0]&&(e=a.aaSorting=[e]);c&&a.oFeatures.bSortMulti?(c=h.inArray(b,D(e,"0")),-1!==c?(b=g(e[c],!0),null===b&&1===e.length&&(b=0),null===b?e.splice(c,1):(e[c][1]=f[b],e[c]._idx=b)):(e.push([b,f[0],0]),e[e.length-1]._idx=0)):e.length&&e[0][0]==b?(b=g(e[0]),e.length=1,e[0][1]=f[b],
e[0]._idx=b):(e.length=0,e.push([b,f[0]]),e[0]._idx=0);R(a);"function"==typeof d&&d(a)}function Oa(a,b,c,d){var e=a.aoColumns[c];Va(b,{},function(b){!1!==e.bSortable&&(a.oFeatures.bProcessing?(C(a,!0),setTimeout(function(){Ua(a,c,b.shiftKey,d);"ssp"!==y(a)&&C(a,!1)},0)):Ua(a,c,b.shiftKey,d))})}function xa(a){var b=a.aLastSort,c=a.oClasses.sSortColumn,d=V(a),e=a.oFeatures,f,g;if(e.bSort&&e.bSortClasses){e=0;for(f=b.length;e<f;e++)g=b[e].src,h(D(a.aoData,"anCells",g)).removeClass(c+(2>e?e+1:3));e=0;
for(f=d.length;e<f;e++)g=d[e].src,h(D(a.aoData,"anCells",g)).addClass(c+(2>e?e+1:3))}a.aLastSort=d}function Ib(a,b){var c=a.aoColumns[b],d=m.ext.order[c.sSortDataType],e;d&&(e=d.call(a.oInstance,a,b,ba(a,b)));for(var f,g=m.ext.type.order[c.sType+"-pre"],i=0,h=a.aoData.length;i<h;i++)if(c=a.aoData[i],c._aSortData||(c._aSortData=[]),!c._aSortData[b]||d)f=d?e[i]:B(a,i,b,"sort"),c._aSortData[b]=g?g(f):f}function ya(a){if(a.oFeatures.bStateSave&&!a.bDestroying){var b={time:+new Date,start:a._iDisplayStart,
length:a._iDisplayLength,order:h.extend(!0,[],a.aaSorting),search:Ab(a.oPreviousSearch),columns:h.map(a.aoColumns,function(b,d){return{visible:b.bVisible,search:Ab(a.aoPreSearchCols[d])}})};w(a,"aoStateSaveParams","stateSaveParams",[a,b]);a.oSavedState=b;a.fnStateSaveCallback.call(a.oInstance,a,b)}}function Kb(a){var b,c,d=a.aoColumns;if(a.oFeatures.bStateSave){var e=a.fnStateLoadCallback.call(a.oInstance,a);if(e&&e.time&&(b=w(a,"aoStateLoadParams","stateLoadParams",[a,e]),-1===h.inArray(!1,b)&&(b=
a.iStateDuration,!(0<b&&e.time<+new Date-1E3*b)&&d.length===e.columns.length))){a.oLoadedState=h.extend(!0,{},e);e.start!==k&&(a._iDisplayStart=e.start,a.iInitDisplayStart=e.start);e.length!==k&&(a._iDisplayLength=e.length);e.order!==k&&(a.aaSorting=[],h.each(e.order,function(b,c){a.aaSorting.push(c[0]>=d.length?[0,c[1]]:c)}));e.search!==k&&h.extend(a.oPreviousSearch,Bb(e.search));b=0;for(c=e.columns.length;b<c;b++){var f=e.columns[b];f.visible!==k&&(d[b].bVisible=f.visible);f.search!==k&&h.extend(a.aoPreSearchCols[b],
Bb(f.search))}w(a,"aoStateLoaded","stateLoaded",[a,e])}}}function za(a){var b=m.settings,a=h.inArray(a,D(b,"nTable"));return-1!==a?b[a]:null}function J(a,b,c,d){c="DataTables warning: "+(a?"table id="+a.sTableId+" - ":"")+c;d&&(c+=". For more information about this error, please see http://datatables.net/tn/"+d);if(b)Fa.console&&console.log&&console.log(c);else if(b=m.ext,b=b.sErrMode||b.errMode,a&&w(a,null,"error",[a,d,c]),"alert"==b)alert(c);else{if("throw"==b)throw Error(c);"function"==typeof b&&
b(a,d,c)}}function F(a,b,c,d){h.isArray(c)?h.each(c,function(c,d){h.isArray(d)?F(a,b,d[0],d[1]):F(a,b,d)}):(d===k&&(d=c),b[c]!==k&&(a[d]=b[c]))}function Lb(a,b,c){var d,e;for(e in b)b.hasOwnProperty(e)&&(d=b[e],h.isPlainObject(d)?(h.isPlainObject(a[e])||(a[e]={}),h.extend(!0,a[e],d)):a[e]=c&&"data"!==e&&"aaData"!==e&&h.isArray(d)?d.slice():d);return a}function Va(a,b,c){h(a).bind("click.DT",b,function(b){a.blur();c(b)}).bind("keypress.DT",b,function(a){13===a.which&&(a.preventDefault(),c(a))}).bind("selectstart.DT",
function(){return!1})}function z(a,b,c,d){c&&a[b].push({fn:c,sName:d})}function w(a,b,c,d){var e=[];b&&(e=h.map(a[b].slice().reverse(),function(b){return b.fn.apply(a.oInstance,d)}));null!==c&&(b=h.Event(c+".dt"),h(a.nTable).trigger(b,d),e.push(b.result));return e}function Sa(a){var b=a._iDisplayStart,c=a.fnDisplayEnd(),d=a._iDisplayLength;b>=c&&(b=c-d);b-=b%d;if(-1===d||0>b)b=0;a._iDisplayStart=b}function Pa(a,b){var c=a.renderer,d=m.ext.renderer[b];return h.isPlainObject(c)&&c[b]?d[c[b]]||d._:"string"===
typeof c?d[c]||d._:d._}function y(a){return a.oFeatures.bServerSide?"ssp":a.ajax||a.sAjaxSource?"ajax":"dom"}function Aa(a,b){var c=[],c=Mb.numbers_length,d=Math.floor(c/2);b<=c?c=W(0,b):a<=d?(c=W(0,c-2),c.push("ellipsis"),c.push(b-1)):(a>=b-1-d?c=W(b-(c-2),b):(c=W(a-d+2,a+d-1),c.push("ellipsis"),c.push(b-1)),c.splice(0,0,"ellipsis"),c.splice(0,0,0));c.DT_el="span";return c}function cb(a){h.each({num:function(b){return Ba(b,a)},"num-fmt":function(b){return Ba(b,a,Wa)},"html-num":function(b){return Ba(b,
a,Ca)},"html-num-fmt":function(b){return Ba(b,a,Ca,Wa)}},function(b,c){v.type.order[b+a+"-pre"]=c;b.match(/^html\-/)&&(v.type.search[b+a]=v.type.search.html)})}function Nb(a){return function(){var b=[za(this[m.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return m.ext.internal[a].apply(this,b)}}var m,v,t,p,s,Xa={},Ob=/[\r\n]/g,Ca=/<.*?>/g,ac=/^[\w\+\-]/,bc=/[\w\+\-]$/,Yb=RegExp("(\\/|\\.|\\*|\\+|\\?|\\||\\(|\\)|\\[|\\]|\\{|\\}|\\\\|\\$|\\^|\\-)","g"),Wa=/[',$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfk]/gi,
K=function(a){return!a||!0===a||"-"===a?!0:!1},Pb=function(a){var b=parseInt(a,10);return!isNaN(b)&&isFinite(a)?b:null},Qb=function(a,b){Xa[b]||(Xa[b]=RegExp(va(b),"g"));return"string"===typeof a&&"."!==b?a.replace(/\./g,"").replace(Xa[b],"."):a},Ya=function(a,b,c){var d="string"===typeof a;if(K(a))return!0;b&&d&&(a=Qb(a,b));c&&d&&(a=a.replace(Wa,""));return!isNaN(parseFloat(a))&&isFinite(a)},Rb=function(a,b,c){return K(a)?!0:!(K(a)||"string"===typeof a)?null:Ya(a.replace(Ca,""),b,c)?!0:null},D=function(a,
b,c){var d=[],e=0,f=a.length;if(c!==k)for(;e<f;e++)a[e]&&a[e][b]&&d.push(a[e][b][c]);else for(;e<f;e++)a[e]&&d.push(a[e][b]);return d},ja=function(a,b,c,d){var e=[],f=0,g=b.length;if(d!==k)for(;f<g;f++)a[b[f]][c]&&e.push(a[b[f]][c][d]);else for(;f<g;f++)e.push(a[b[f]][c]);return e},W=function(a,b){var c=[],d;b===k?(b=0,d=a):(d=b,b=a);for(var e=b;e<d;e++)c.push(e);return c},Sb=function(a){for(var b=[],c=0,d=a.length;c<d;c++)a[c]&&b.push(a[c]);return b},pa=function(a){var b=[],c,d,e=a.length,f,g=0;
d=0;a:for(;d<e;d++){c=a[d];for(f=0;f<g;f++)if(b[f]===c)continue a;b.push(c);g++}return b},A=function(a,b,c){a[b]!==k&&(a[c]=a[b])},da=/\[.*?\]$/,U=/\(\)$/,wa=h("<div>")[0],Zb=wa.textContent!==k,$b=/<.*?>/g;m=function(a){this.$=function(a,b){return this.api(!0).$(a,b)};this._=function(a,b){return this.api(!0).rows(a,b).data()};this.api=function(a){return a?new t(za(this[v.iApiIndex])):new t(this)};this.fnAddData=function(a,b){var c=this.api(!0),d=h.isArray(a)&&(h.isArray(a[0])||h.isPlainObject(a[0]))?
c.rows.add(a):c.row.add(a);(b===k||b)&&c.draw();return d.flatten().toArray()};this.fnAdjustColumnSizing=function(a){var b=this.api(!0).columns.adjust(),c=b.settings()[0],d=c.oScroll;a===k||a?b.draw(!1):(""!==d.sX||""!==d.sY)&&Z(c)};this.fnClearTable=function(a){var b=this.api(!0).clear();(a===k||a)&&b.draw()};this.fnClose=function(a){this.api(!0).row(a).child.hide()};this.fnDeleteRow=function(a,b,c){var d=this.api(!0),a=d.rows(a),e=a.settings()[0],h=e.aoData[a[0][0]];a.remove();b&&b.call(this,e,h);
(c===k||c)&&d.draw();return h};this.fnDestroy=function(a){this.api(!0).destroy(a)};this.fnDraw=function(a){this.api(!0).draw(a)};this.fnFilter=function(a,b,c,d,e,h){e=this.api(!0);null===b||b===k?e.search(a,c,d,h):e.column(b).search(a,c,d,h);e.draw()};this.fnGetData=function(a,b){var c=this.api(!0);if(a!==k){var d=a.nodeName?a.nodeName.toLowerCase():"";return b!==k||"td"==d||"th"==d?c.cell(a,b).data():c.row(a).data()||null}return c.data().toArray()};this.fnGetNodes=function(a){var b=this.api(!0);
return a!==k?b.row(a).node():b.rows().nodes().flatten().toArray()};this.fnGetPosition=function(a){var b=this.api(!0),c=a.nodeName.toUpperCase();return"TR"==c?b.row(a).index():"TD"==c||"TH"==c?(a=b.cell(a).index(),[a.row,a.columnVisible,a.column]):null};this.fnIsOpen=function(a){return this.api(!0).row(a).child.isShown()};this.fnOpen=function(a,b,c){return this.api(!0).row(a).child(b,c).show().child()[0]};this.fnPageChange=function(a,b){var c=this.api(!0).page(a);(b===k||b)&&c.draw(!1)};this.fnSetColumnVis=
function(a,b,c){a=this.api(!0).column(a).visible(b);(c===k||c)&&a.columns.adjust().draw()};this.fnSettings=function(){return za(this[v.iApiIndex])};this.fnSort=function(a){this.api(!0).order(a).draw()};this.fnSortListener=function(a,b,c){this.api(!0).order.listener(a,b,c)};this.fnUpdate=function(a,b,c,d,e){var h=this.api(!0);c===k||null===c?h.row(b).data(a):h.cell(b,c).data(a);(e===k||e)&&h.columns.adjust();(d===k||d)&&h.draw();return 0};this.fnVersionCheck=v.fnVersionCheck;var b=this,c=a===k,d=this.length;
c&&(a={});this.oApi=this.internal=v.internal;for(var e in m.ext.internal)e&&(this[e]=Nb(e));this.each(function(){var e={},e=1<d?Lb(e,a,!0):a,g=0,i,j=this.getAttribute("id"),n=!1,l=m.defaults,r=h(this);if("table"!=this.nodeName.toLowerCase())J(null,0,"Non-table node initialisation ("+this.nodeName+")",2);else{db(l);eb(l.column);I(l,l,!0);I(l.column,l.column,!0);I(l,h.extend(e,r.data()));var q=m.settings,g=0;for(i=q.length;g<i;g++){var p=q[g];if(p.nTable==this||p.nTHead.parentNode==this||p.nTFoot&&
p.nTFoot.parentNode==this){g=e.bRetrieve!==k?e.bRetrieve:l.bRetrieve;if(c||g)return p.oInstance;if(e.bDestroy!==k?e.bDestroy:l.bDestroy){p.oInstance.fnDestroy();break}else{J(p,0,"Cannot reinitialise DataTable",3);return}}if(p.sTableId==this.id){q.splice(g,1);break}}if(null===j||""===j)this.id=j="DataTables_Table_"+m.ext._unique++;var o=h.extend(!0,{},m.models.oSettings,{sDestroyWidth:r[0].style.width,sInstance:j,sTableId:j});o.nTable=this;o.oApi=b.internal;o.oInit=e;q.push(o);o.oInstance=1===b.length?
b:r.dataTable();db(e);e.oLanguage&&S(e.oLanguage);e.aLengthMenu&&!e.iDisplayLength&&(e.iDisplayLength=h.isArray(e.aLengthMenu[0])?e.aLengthMenu[0][0]:e.aLengthMenu[0]);e=Lb(h.extend(!0,{},l),e);F(o.oFeatures,e,"bPaginate bLengthChange bFilter bSort bSortMulti bInfo bProcessing bAutoWidth bSortClasses bServerSide bDeferRender".split(" "));F(o,e,["asStripeClasses","ajax","fnServerData","fnFormatNumber","sServerMethod","aaSorting","aaSortingFixed","aLengthMenu","sPaginationType","sAjaxSource","sAjaxDataProp",
"iStateDuration","sDom","bSortCellsTop","iTabIndex","fnStateLoadCallback","fnStateSaveCallback","renderer","searchDelay","rowId",["iCookieDuration","iStateDuration"],["oSearch","oPreviousSearch"],["aoSearchCols","aoPreSearchCols"],["iDisplayLength","_iDisplayLength"],["bJQueryUI","bJUI"]]);F(o.oScroll,e,[["sScrollX","sX"],["sScrollXInner","sXInner"],["sScrollY","sY"],["bScrollCollapse","bCollapse"]]);F(o.oLanguage,e,"fnInfoCallback");z(o,"aoDrawCallback",e.fnDrawCallback,"user");z(o,"aoServerParams",
e.fnServerParams,"user");z(o,"aoStateSaveParams",e.fnStateSaveParams,"user");z(o,"aoStateLoadParams",e.fnStateLoadParams,"user");z(o,"aoStateLoaded",e.fnStateLoaded,"user");z(o,"aoRowCallback",e.fnRowCallback,"user");z(o,"aoRowCreatedCallback",e.fnCreatedRow,"user");z(o,"aoHeaderCallback",e.fnHeaderCallback,"user");z(o,"aoFooterCallback",e.fnFooterCallback,"user");z(o,"aoInitComplete",e.fnInitComplete,"user");z(o,"aoPreDrawCallback",e.fnPreDrawCallback,"user");o.rowIdFn=P(e.rowId);fb(o);j=o.oClasses;
e.bJQueryUI?(h.extend(j,m.ext.oJUIClasses,e.oClasses),e.sDom===l.sDom&&"lfrtip"===l.sDom&&(o.sDom='<"H"lfr>t<"F"ip>'),o.renderer)?h.isPlainObject(o.renderer)&&!o.renderer.header&&(o.renderer.header="jqueryui"):o.renderer="jqueryui":h.extend(j,m.ext.classes,e.oClasses);r.addClass(j.sTable);o.iInitDisplayStart===k&&(o.iInitDisplayStart=e.iDisplayStart,o._iDisplayStart=e.iDisplayStart);null!==e.iDeferLoading&&(o.bDeferLoading=!0,g=h.isArray(e.iDeferLoading),o._iRecordsDisplay=g?e.iDeferLoading[0]:e.iDeferLoading,
o._iRecordsTotal=g?e.iDeferLoading[1]:e.iDeferLoading);var t=o.oLanguage;h.extend(!0,t,e.oLanguage);""!==t.sUrl&&(h.ajax({dataType:"json",url:t.sUrl,success:function(a){S(a);I(l.oLanguage,a);h.extend(true,t,a);ia(o)},error:function(){ia(o)}}),n=!0);null===e.asStripeClasses&&(o.asStripeClasses=[j.sStripeOdd,j.sStripeEven]);var g=o.asStripeClasses,s=r.children("tbody").find("tr").eq(0);-1!==h.inArray(!0,h.map(g,function(a){return s.hasClass(a)}))&&(h("tbody tr",this).removeClass(g.join(" ")),o.asDestroyStripes=
g.slice());q=[];g=this.getElementsByTagName("thead");0!==g.length&&(fa(o.aoHeader,g[0]),q=qa(o));if(null===e.aoColumns){p=[];g=0;for(i=q.length;g<i;g++)p.push(null)}else p=e.aoColumns;g=0;for(i=p.length;g<i;g++)Ga(o,q?q[g]:null);hb(o,e.aoColumnDefs,p,function(a,b){la(o,a,b)});if(s.length){var u=function(a,b){return a.getAttribute("data-"+b)!==null?b:null};h(s[0]).children("th, td").each(function(a,b){var c=o.aoColumns[a];if(c.mData===a){var d=u(b,"sort")||u(b,"order"),e=u(b,"filter")||u(b,"search");
if(d!==null||e!==null){c.mData={_:a+".display",sort:d!==null?a+".@data-"+d:k,type:d!==null?a+".@data-"+d:k,filter:e!==null?a+".@data-"+e:k};la(o,a)}}})}var v=o.oFeatures;e.bStateSave&&(v.bStateSave=!0,Kb(o,e),z(o,"aoDrawCallback",ya,"state_save"));if(e.aaSorting===k){q=o.aaSorting;g=0;for(i=q.length;g<i;g++)q[g][1]=o.aoColumns[g].asSorting[0]}xa(o);v.bSort&&z(o,"aoDrawCallback",function(){if(o.bSorted){var a=V(o),b={};h.each(a,function(a,c){b[c.src]=c.dir});w(o,null,"order",[o,a,b]);Jb(o)}});z(o,
"aoDrawCallback",function(){(o.bSorted||y(o)==="ssp"||v.bDeferRender)&&xa(o)},"sc");g=r.children("caption").each(function(){this._captionSide=r.css("caption-side")});i=r.children("thead");0===i.length&&(i=h("<thead/>").appendTo(this));o.nTHead=i[0];i=r.children("tbody");0===i.length&&(i=h("<tbody/>").appendTo(this));o.nTBody=i[0];i=r.children("tfoot");if(0===i.length&&0<g.length&&(""!==o.oScroll.sX||""!==o.oScroll.sY))i=h("<tfoot/>").appendTo(this);0===i.length||0===i.children().length?r.addClass(j.sNoFooter):
0<i.length&&(o.nTFoot=i[0],fa(o.aoFooter,o.nTFoot));if(e.aaData)for(g=0;g<e.aaData.length;g++)L(o,e.aaData[g]);else(o.bDeferLoading||"dom"==y(o))&&ma(o,h(o.nTBody).children("tr"));o.aiDisplay=o.aiDisplayMaster.slice();o.bInitialised=!0;!1===n&&ia(o)}});b=null;return this};var Tb=[],x=Array.prototype,cc=function(a){var b,c,d=m.settings,e=h.map(d,function(a){return a.nTable});if(a){if(a.nTable&&a.oApi)return[a];if(a.nodeName&&"table"===a.nodeName.toLowerCase())return b=h.inArray(a,e),-1!==b?[d[b]]:
null;if(a&&"function"===typeof a.settings)return a.settings().toArray();"string"===typeof a?c=h(a):a instanceof h&&(c=a)}else return[];if(c)return c.map(function(){b=h.inArray(this,e);return-1!==b?d[b]:null}).toArray()};t=function(a,b){if(!(this instanceof t))return new t(a,b);var c=[],d=function(a){(a=cc(a))&&(c=c.concat(a))};if(h.isArray(a))for(var e=0,f=a.length;e<f;e++)d(a[e]);else d(a);this.context=pa(c);b&&h.merge(this,b);this.selector={rows:null,cols:null,opts:null};t.extend(this,this,Tb)};
m.Api=t;h.extend(t.prototype,{any:function(){return 0!==this.count()},concat:x.concat,context:[],count:function(){return this.flatten().length},each:function(a){for(var b=0,c=this.length;b<c;b++)a.call(this,this[b],b,this);return this},eq:function(a){var b=this.context;return b.length>a?new t(b[a],this[a]):null},filter:function(a){var b=[];if(x.filter)b=x.filter.call(this,a,this);else for(var c=0,d=this.length;c<d;c++)a.call(this,this[c],c,this)&&b.push(this[c]);return new t(this.context,b)},flatten:function(){var a=
[];return new t(this.context,a.concat.apply(a,this.toArray()))},join:x.join,indexOf:x.indexOf||function(a,b){for(var c=b||0,d=this.length;c<d;c++)if(this[c]===a)return c;return-1},iterator:function(a,b,c,d){var e=[],f,g,h,j,n,l=this.context,m,q,p=this.selector;"string"===typeof a&&(d=c,c=b,b=a,a=!1);g=0;for(h=l.length;g<h;g++){var o=new t(l[g]);if("table"===b)f=c.call(o,l[g],g),f!==k&&e.push(f);else if("columns"===b||"rows"===b)f=c.call(o,l[g],this[g],g),f!==k&&e.push(f);else if("column"===b||"column-rows"===
b||"row"===b||"cell"===b){q=this[g];"column-rows"===b&&(m=Da(l[g],p.opts));j=0;for(n=q.length;j<n;j++)f=q[j],f="cell"===b?c.call(o,l[g],f.row,f.column,g,j):c.call(o,l[g],f,g,j,m),f!==k&&e.push(f)}}return e.length||d?(a=new t(l,a?e.concat.apply([],e):e),b=a.selector,b.rows=p.rows,b.cols=p.cols,b.opts=p.opts,a):this},lastIndexOf:x.lastIndexOf||function(a,b){return this.indexOf.apply(this.toArray.reverse(),arguments)},length:0,map:function(a){var b=[];if(x.map)b=x.map.call(this,a,this);else for(var c=
0,d=this.length;c<d;c++)b.push(a.call(this,this[c],c));return new t(this.context,b)},pluck:function(a){return this.map(function(b){return b[a]})},pop:x.pop,push:x.push,reduce:x.reduce||function(a,b){return gb(this,a,b,0,this.length,1)},reduceRight:x.reduceRight||function(a,b){return gb(this,a,b,this.length-1,-1,-1)},reverse:x.reverse,selector:null,shift:x.shift,sort:x.sort,splice:x.splice,toArray:function(){return x.slice.call(this)},to$:function(){return h(this)},toJQuery:function(){return h(this)},
unique:function(){return new t(this.context,pa(this))},unshift:x.unshift});t.extend=function(a,b,c){if(c.length&&b&&(b instanceof t||b.__dt_wrapper)){var d,e,f,g=function(a,b,c){return function(){var d=b.apply(a,arguments);t.extend(d,d,c.methodExt);return d}};d=0;for(e=c.length;d<e;d++)f=c[d],b[f.name]="function"===typeof f.val?g(a,f.val,f):h.isPlainObject(f.val)?{}:f.val,b[f.name].__dt_wrapper=!0,t.extend(a,b[f.name],f.propExt)}};t.register=p=function(a,b){if(h.isArray(a))for(var c=0,d=a.length;c<
d;c++)t.register(a[c],b);else for(var e=a.split("."),f=Tb,g,i,c=0,d=e.length;c<d;c++){g=(i=-1!==e[c].indexOf("()"))?e[c].replace("()",""):e[c];var j;a:{j=0;for(var n=f.length;j<n;j++)if(f[j].name===g){j=f[j];break a}j=null}j||(j={name:g,val:{},methodExt:[],propExt:[]},f.push(j));c===d-1?j.val=b:f=i?j.methodExt:j.propExt}};t.registerPlural=s=function(a,b,c){t.register(a,c);t.register(b,function(){var a=c.apply(this,arguments);return a===this?this:a instanceof t?a.length?h.isArray(a[0])?new t(a.context,
a[0]):a[0]:k:a})};p("tables()",function(a){var b;if(a){b=t;var c=this.context;if("number"===typeof a)a=[c[a]];else var d=h.map(c,function(a){return a.nTable}),a=h(d).filter(a).map(function(){var a=h.inArray(this,d);return c[a]}).toArray();b=new b(a)}else b=this;return b});p("table()",function(a){var a=this.tables(a),b=a.context;return b.length?new t(b[0]):a});s("tables().nodes()","table().node()",function(){return this.iterator("table",function(a){return a.nTable},1)});s("tables().body()","table().body()",
function(){return this.iterator("table",function(a){return a.nTBody},1)});s("tables().header()","table().header()",function(){return this.iterator("table",function(a){return a.nTHead},1)});s("tables().footer()","table().footer()",function(){return this.iterator("table",function(a){return a.nTFoot},1)});s("tables().containers()","table().container()",function(){return this.iterator("table",function(a){return a.nTableWrapper},1)});p("draw()",function(a){return this.iterator("table",function(b){"page"===
a?M(b):("string"===typeof a&&(a="full-hold"===a?!1:!0),R(b,!1===a))})});p("page()",function(a){return a===k?this.page.info().page:this.iterator("table",function(b){Ta(b,a)})});p("page.info()",function(){if(0===this.context.length)return k;var a=this.context[0],b=a._iDisplayStart,c=a._iDisplayLength,d=a.fnRecordsDisplay(),e=-1===c;return{page:e?0:Math.floor(b/c),pages:e?1:Math.ceil(d/c),start:b,end:a.fnDisplayEnd(),length:c,recordsTotal:a.fnRecordsTotal(),recordsDisplay:d,serverSide:"ssp"===y(a)}});
p("page.len()",function(a){return a===k?0!==this.context.length?this.context[0]._iDisplayLength:k:this.iterator("table",function(b){Ra(b,a)})});var Ub=function(a,b,c){if(c){var d=new t(a);d.one("draw",function(){c(d.ajax.json())})}if("ssp"==y(a))R(a,b);else{C(a,!0);var e=a.jqXHR;e&&4!==e.readyState&&e.abort();ra(a,[],function(c){na(a);for(var c=sa(a,c),d=0,e=c.length;d<e;d++)L(a,c[d]);R(a,b);C(a,!1)})}};p("ajax.json()",function(){var a=this.context;if(0<a.length)return a[0].json});p("ajax.params()",
function(){var a=this.context;if(0<a.length)return a[0].oAjaxData});p("ajax.reload()",function(a,b){return this.iterator("table",function(c){Ub(c,!1===b,a)})});p("ajax.url()",function(a){var b=this.context;if(a===k){if(0===b.length)return k;b=b[0];return b.ajax?h.isPlainObject(b.ajax)?b.ajax.url:b.ajax:b.sAjaxSource}return this.iterator("table",function(b){h.isPlainObject(b.ajax)?b.ajax.url=a:b.ajax=a})});p("ajax.url().load()",function(a,b){return this.iterator("table",function(c){Ub(c,!1===b,a)})});
var Za=function(a,b,c,d,e){var f=[],g,i,j,n,l,m;j=typeof b;if(!b||"string"===j||"function"===j||b.length===k)b=[b];j=0;for(n=b.length;j<n;j++){i=b[j]&&b[j].split?b[j].split(","):[b[j]];l=0;for(m=i.length;l<m;l++)(g=c("string"===typeof i[l]?h.trim(i[l]):i[l]))&&g.length&&(f=f.concat(g))}a=v.selector[a];if(a.length){j=0;for(n=a.length;j<n;j++)f=a[j](d,e,f)}return pa(f)},$a=function(a){a||(a={});a.filter&&a.search===k&&(a.search=a.filter);return h.extend({search:"none",order:"current",page:"all"},a)},
ab=function(a){for(var b=0,c=a.length;b<c;b++)if(0<a[b].length)return a[0]=a[b],a[0].length=1,a.length=1,a.context=[a.context[b]],a;a.length=0;return a},Da=function(a,b){var c,d,e,f=[],g=a.aiDisplay;c=a.aiDisplayMaster;var i=b.search;d=b.order;e=b.page;if("ssp"==y(a))return"removed"===i?[]:W(0,c.length);if("current"==e){c=a._iDisplayStart;for(d=a.fnDisplayEnd();c<d;c++)f.push(g[c])}else if("current"==d||"applied"==d)f="none"==i?c.slice():"applied"==i?g.slice():h.map(c,function(a){return-1===h.inArray(a,
g)?a:null});else if("index"==d||"original"==d){c=0;for(d=a.aoData.length;c<d;c++)"none"==i?f.push(c):(e=h.inArray(c,g),(-1===e&&"removed"==i||0<=e&&"applied"==i)&&f.push(c))}return f};p("rows()",function(a,b){a===k?a="":h.isPlainObject(a)&&(b=a,a="");var b=$a(b),c=this.iterator("table",function(c){var e=b;return Za("row",a,function(a){var b=Pb(a);if(b!==null&&!e)return[b];var i=Da(c,e);if(b!==null&&h.inArray(b,i)!==-1)return[b];if(!a)return i;if(typeof a==="function")return h.map(i,function(b){var e=
c.aoData[b];return a(b,e._aData,e.nTr)?b:null});b=Sb(ja(c.aoData,i,"nTr"));if(a.nodeName&&h.inArray(a,b)!==-1)return[a._DT_RowIndex];if(typeof a==="string"&&a.charAt(0)==="#"){i=c.aIds[a.replace(/^#/,"")];if(i!==k)return[i.idx]}return h(b).filter(a).map(function(){return this._DT_RowIndex}).toArray()},c,e)},1);c.selector.rows=a;c.selector.opts=b;return c});p("rows().nodes()",function(){return this.iterator("row",function(a,b){return a.aoData[b].nTr||k},1)});p("rows().data()",function(){return this.iterator(!0,
"rows",function(a,b){return ja(a.aoData,b,"_aData")},1)});s("rows().cache()","row().cache()",function(a){return this.iterator("row",function(b,c){var d=b.aoData[c];return"search"===a?d._aFilterData:d._aSortData},1)});s("rows().invalidate()","row().invalidate()",function(a){return this.iterator("row",function(b,c){ea(b,c,a)})});s("rows().indexes()","row().index()",function(){return this.iterator("row",function(a,b){return b},1)});s("rows().ids()","row().id()",function(a){for(var b=[],c=this.context,
d=0,e=c.length;d<e;d++)for(var f=0,g=this[d].length;f<g;f++){var h=c[d].rowIdFn(c[d].aoData[this[d][f]]._aData);b.push((!0===a?"#":"")+h)}return new t(c,b)});s("rows().remove()","row().remove()",function(){var a=this;this.iterator("row",function(b,c,d){var e=b.aoData,f=e[c];e.splice(c,1);for(var g=0,h=e.length;g<h;g++)null!==e[g].nTr&&(e[g].nTr._DT_RowIndex=g);oa(b.aiDisplayMaster,c);oa(b.aiDisplay,c);oa(a[d],c,!1);Sa(b);c=b.rowIdFn(f._aData);c!==k&&delete b.aIds[c]});this.iterator("table",function(a){for(var c=
0,d=a.aoData.length;c<d;c++)a.aoData[c].idx=c});return this});p("rows.add()",function(a){var b=this.iterator("table",function(b){var c,f,g,h=[];f=0;for(g=a.length;f<g;f++)c=a[f],c.nodeName&&"TR"===c.nodeName.toUpperCase()?h.push(ma(b,c)[0]):h.push(L(b,c));return h},1),c=this.rows(-1);c.pop();h.merge(c,b);return c});p("row()",function(a,b){return ab(this.rows(a,b))});p("row().data()",function(a){var b=this.context;if(a===k)return b.length&&this.length?b[0].aoData[this[0]]._aData:k;b[0].aoData[this[0]]._aData=
a;ea(b[0],this[0],"data");return this});p("row().node()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]].nTr||null:null});p("row.add()",function(a){a instanceof h&&a.length&&(a=a[0]);var b=this.iterator("table",function(b){return a.nodeName&&"TR"===a.nodeName.toUpperCase()?ma(b,a)[0]:L(b,a)});return this.row(b[0])});var bb=function(a,b){var c=a.context;if(c.length&&(c=c[0].aoData[b!==k?b:a[0]])&&c._details)c._details.remove(),c._detailsShow=k,c._details=k},Vb=function(a,
b){var c=a.context;if(c.length&&a.length){var d=c[0].aoData[a[0]];if(d._details){(d._detailsShow=b)?d._details.insertAfter(d.nTr):d._details.detach();var e=c[0],f=new t(e),g=e.aoData;f.off("draw.dt.DT_details column-visibility.dt.DT_details destroy.dt.DT_details");0<D(g,"_details").length&&(f.on("draw.dt.DT_details",function(a,b){e===b&&f.rows({page:"current"}).eq(0).each(function(a){a=g[a];a._detailsShow&&a._details.insertAfter(a.nTr)})}),f.on("column-visibility.dt.DT_details",function(a,b){if(e===
b)for(var c,d=ca(b),f=0,h=g.length;f<h;f++)c=g[f],c._details&&c._details.children("td[colspan]").attr("colspan",d)}),f.on("destroy.dt.DT_details",function(a,b){if(e===b)for(var c=0,d=g.length;c<d;c++)g[c]._details&&bb(f,c)}))}}};p("row().child()",function(a,b){var c=this.context;if(a===k)return c.length&&this.length?c[0].aoData[this[0]]._details:k;if(!0===a)this.child.show();else if(!1===a)bb(this);else if(c.length&&this.length){var d=c[0],c=c[0].aoData[this[0]],e=[],f=function(a,b){if(h.isArray(a)||
a instanceof h)for(var c=0,k=a.length;c<k;c++)f(a[c],b);else a.nodeName&&"tr"===a.nodeName.toLowerCase()?e.push(a):(c=h("<tr><td/></tr>").addClass(b),h("td",c).addClass(b).html(a)[0].colSpan=ca(d),e.push(c[0]))};f(a,b);c._details&&c._details.remove();c._details=h(e);c._detailsShow&&c._details.insertAfter(c.nTr)}return this});p(["row().child.show()","row().child().show()"],function(){Vb(this,!0);return this});p(["row().child.hide()","row().child().hide()"],function(){Vb(this,!1);return this});p(["row().child.remove()",
"row().child().remove()"],function(){bb(this);return this});p("row().child.isShown()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]]._detailsShow||!1:!1});var dc=/^(.+):(name|visIdx|visible)$/,Wb=function(a,b,c,d,e){for(var c=[],d=0,f=e.length;d<f;d++)c.push(B(a,e[d],b));return c};p("columns()",function(a,b){a===k?a="":h.isPlainObject(a)&&(b=a,a="");var b=$a(b),c=this.iterator("table",function(c){var e=a,f=b,g=c.aoColumns,i=D(g,"sName"),j=D(g,"nTh");return Za("column",
e,function(a){var b=Pb(a);if(a==="")return W(g.length);if(b!==null)return[b>=0?b:g.length+b];if(typeof a==="function"){var e=Da(c,f);return h.map(g,function(b,f){return a(f,Wb(c,f,0,0,e),j[f])?f:null})}var k=typeof a==="string"?a.match(dc):"";if(k)switch(k[2]){case "visIdx":case "visible":b=parseInt(k[1],10);if(b<0){var m=h.map(g,function(a,b){return a.bVisible?b:null});return[m[m.length+b]]}return[$(c,b)];case "name":return h.map(i,function(a,b){return a===k[1]?b:null})}else return h(j).filter(a).map(function(){return h.inArray(this,
j)}).toArray()},c,f)},1);c.selector.cols=a;c.selector.opts=b;return c});s("columns().header()","column().header()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].nTh},1)});s("columns().footer()","column().footer()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].nTf},1)});s("columns().data()","column().data()",function(){return this.iterator("column-rows",Wb,1)});s("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",
function(a,b){return a.aoColumns[b].mData},1)});s("columns().cache()","column().cache()",function(a){return this.iterator("column-rows",function(b,c,d,e,f){return ja(b.aoData,f,"search"===a?"_aFilterData":"_aSortData",c)},1)});s("columns().nodes()","column().nodes()",function(){return this.iterator("column-rows",function(a,b,c,d,e){return ja(a.aoData,e,"anCells",b)},1)});s("columns().visible()","column().visible()",function(a,b){return this.iterator("column",function(c,d){if(a===k)return c.aoColumns[d].bVisible;
var e=c.aoColumns,f=e[d],g=c.aoData,i,j,m;if(a!==k&&f.bVisible!==a){if(a){var l=h.inArray(!0,D(e,"bVisible"),d+1);i=0;for(j=g.length;i<j;i++)m=g[i].nTr,e=g[i].anCells,m&&m.insertBefore(e[d],e[l]||null)}else h(D(c.aoData,"anCells",d)).detach();f.bVisible=a;ga(c,c.aoHeader);ga(c,c.aoFooter);if(b===k||b)Y(c),(c.oScroll.sX||c.oScroll.sY)&&Z(c);w(c,null,"column-visibility",[c,d,a]);ya(c)}})});s("columns().indexes()","column().index()",function(a){return this.iterator("column",function(b,c){return"visible"===
a?ba(b,c):c},1)});p("columns.adjust()",function(){return this.iterator("table",function(a){Y(a)},1)});p("column.index()",function(a,b){if(0!==this.context.length){var c=this.context[0];if("fromVisible"===a||"toData"===a)return $(c,b);if("fromData"===a||"toVisible"===a)return ba(c,b)}});p("column()",function(a,b){return ab(this.columns(a,b))});p("cells()",function(a,b,c){h.isPlainObject(a)&&(a.row===k?(c=a,a=null):(c=b,b=null));h.isPlainObject(b)&&(c=b,b=null);if(null===b||b===k)return this.iterator("table",
function(b){var d=a,e=$a(c),f=b.aoData,g=Da(b,e),i=Sb(ja(f,g,"anCells")),j=h([].concat.apply([],i)),l,m=b.aoColumns.length,n,p,t,s,u,v;return Za("cell",d,function(a){var c=typeof a==="function";if(a===null||a===k||c){n=[];p=0;for(t=g.length;p<t;p++){l=g[p];for(s=0;s<m;s++){u={row:l,column:s};if(c){v=f[l];a(u,B(b,l,s),v.anCells?v.anCells[s]:null)&&n.push(u)}else n.push(u)}}return n}return h.isPlainObject(a)?[a]:j.filter(a).map(function(a,b){if(b.parentNode)l=b.parentNode._DT_RowIndex;else{a=0;for(t=
f.length;a<t;a++)if(h.inArray(b,f[a].anCells)!==-1){l=a;break}}return{row:l,column:h.inArray(b,f[l].anCells)}}).toArray()},b,e)});var d=this.columns(b,c),e=this.rows(a,c),f,g,i,j,m,l=this.iterator("table",function(a,b){f=[];g=0;for(i=e[b].length;g<i;g++){j=0;for(m=d[b].length;j<m;j++)f.push({row:e[b][g],column:d[b][j]})}return f},1);h.extend(l.selector,{cols:b,rows:a,opts:c});return l});s("cells().nodes()","cell().node()",function(){return this.iterator("cell",function(a,b,c){return(a=a.aoData[b].anCells)?
a[c]:k},1)});p("cells().data()",function(){return this.iterator("cell",function(a,b,c){return B(a,b,c)},1)});s("cells().cache()","cell().cache()",function(a){a="search"===a?"_aFilterData":"_aSortData";return this.iterator("cell",function(b,c,d){return b.aoData[c][a][d]},1)});s("cells().render()","cell().render()",function(a){return this.iterator("cell",function(b,c,d){return B(b,c,d,a)},1)});s("cells().indexes()","cell().index()",function(){return this.iterator("cell",function(a,b,c){return{row:b,
column:c,columnVisible:ba(a,c)}},1)});s("cells().invalidate()","cell().invalidate()",function(a){return this.iterator("cell",function(b,c,d){ea(b,c,a,d)})});p("cell()",function(a,b,c){return ab(this.cells(a,b,c))});p("cell().data()",function(a){var b=this.context,c=this[0];if(a===k)return b.length&&c.length?B(b[0],c[0].row,c[0].column):k;ib(b[0],c[0].row,c[0].column,a);ea(b[0],c[0].row,"data",c[0].column);return this});p("order()",function(a,b){var c=this.context;if(a===k)return 0!==c.length?c[0].aaSorting:
k;"number"===typeof a?a=[[a,b]]:h.isArray(a[0])||(a=Array.prototype.slice.call(arguments));return this.iterator("table",function(b){b.aaSorting=a.slice()})});p("order.listener()",function(a,b,c){return this.iterator("table",function(d){Oa(d,a,b,c)})});p(["columns().order()","column().order()"],function(a){var b=this;return this.iterator("table",function(c,d){var e=[];h.each(b[d],function(b,c){e.push([c,a])});c.aaSorting=e})});p("search()",function(a,b,c,d){var e=this.context;return a===k?0!==e.length?
e[0].oPreviousSearch.sSearch:k:this.iterator("table",function(e){e.oFeatures.bFilter&&ha(e,h.extend({},e.oPreviousSearch,{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===d?!0:d}),1)})});s("columns().search()","column().search()",function(a,b,c,d){return this.iterator("column",function(e,f){var g=e.aoPreSearchCols;if(a===k)return g[f].sSearch;e.oFeatures.bFilter&&(h.extend(g[f],{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===d?!0:d}),ha(e,
e.oPreviousSearch,1))})});p("state()",function(){return this.context.length?this.context[0].oSavedState:null});p("state.clear()",function(){return this.iterator("table",function(a){a.fnStateSaveCallback.call(a.oInstance,a,{})})});p("state.loaded()",function(){return this.context.length?this.context[0].oLoadedState:null});p("state.save()",function(){return this.iterator("table",function(a){ya(a)})});m.versionCheck=m.fnVersionCheck=function(a){for(var b=m.version.split("."),a=a.split("."),c,d,e=0,f=
a.length;e<f;e++)if(c=parseInt(b[e],10)||0,d=parseInt(a[e],10)||0,c!==d)return c>d;return!0};m.isDataTable=m.fnIsDataTable=function(a){var b=h(a).get(0),c=!1;h.each(m.settings,function(a,e){var f=e.nScrollHead?h("table",e.nScrollHead)[0]:null,g=e.nScrollFoot?h("table",e.nScrollFoot)[0]:null;if(e.nTable===b||f===b||g===b)c=!0});return c};m.tables=m.fnTables=function(a){var b=!1;h.isPlainObject(a)&&(b=a.api,a=a.visible);var c=h.map(m.settings,function(b){if(!a||a&&h(b.nTable).is(":visible"))return b.nTable});
return b?new t(c):c};m.util={throttle:ua,escapeRegex:va};m.camelToHungarian=I;p("$()",function(a,b){var c=this.rows(b).nodes(),c=h(c);return h([].concat(c.filter(a).toArray(),c.find(a).toArray()))});h.each(["on","one","off"],function(a,b){p(b+"()",function(){var a=Array.prototype.slice.call(arguments);a[0].match(/\.dt\b/)||(a[0]+=".dt");var d=h(this.tables().nodes());d[b].apply(d,a);return this})});p("clear()",function(){return this.iterator("table",function(a){na(a)})});p("settings()",function(){return new t(this.context,
this.context)});p("init()",function(){var a=this.context;return a.length?a[0].oInit:null});p("data()",function(){return this.iterator("table",function(a){return D(a.aoData,"_aData")}).flatten()});p("destroy()",function(a){a=a||!1;return this.iterator("table",function(b){var c=b.nTableWrapper.parentNode,d=b.oClasses,e=b.nTable,f=b.nTBody,g=b.nTHead,i=b.nTFoot,j=h(e),f=h(f),k=h(b.nTableWrapper),l=h.map(b.aoData,function(a){return a.nTr}),p;b.bDestroying=!0;w(b,"aoDestroyCallback","destroy",[b]);a||
(new t(b)).columns().visible(!0);k.unbind(".DT").find(":not(tbody *)").unbind(".DT");h(Fa).unbind(".DT-"+b.sInstance);e!=g.parentNode&&(j.children("thead").detach(),j.append(g));i&&e!=i.parentNode&&(j.children("tfoot").detach(),j.append(i));b.aaSorting=[];b.aaSortingFixed=[];xa(b);h(l).removeClass(b.asStripeClasses.join(" "));h("th, td",g).removeClass(d.sSortable+" "+d.sSortableAsc+" "+d.sSortableDesc+" "+d.sSortableNone);b.bJUI&&(h("th span."+d.sSortIcon+", td span."+d.sSortIcon,g).detach(),h("th, td",
g).each(function(){var a=h("div."+d.sSortJUIWrapper,this);h(this).append(a.contents());a.detach()}));f.children().detach();f.append(l);g=a?"remove":"detach";j[g]();k[g]();!a&&c&&(c.insertBefore(e,b.nTableReinsertBefore),j.css("width",b.sDestroyWidth).removeClass(d.sTable),(p=b.asDestroyStripes.length)&&f.children().each(function(a){h(this).addClass(b.asDestroyStripes[a%p])}));c=h.inArray(b,m.settings);-1!==c&&m.settings.splice(c,1)})});h.each(["column","row","cell"],function(a,b){p(b+"s().every()",
function(a){return this.iterator(b,function(d,e,f,g,h){a.call((new t(d))[b](e,"cell"===b?f:k),e,f,g,h)})})});p("i18n()",function(a,b,c){var d=this.context[0],a=P(a)(d.oLanguage);a===k&&(a=b);c!==k&&h.isPlainObject(a)&&(a=a[c]!==k?a[c]:a._);return a.replace("%d",c)});m.version="1.10.9";m.settings=[];m.models={};m.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0};m.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,
idx:-1};m.models.oColumn={idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,sClass:null,sContentPadding:null,sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null};m.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,
25,50,100],aoColumns:null,aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bJQueryUI:!1,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,bSort:!0,bSortMulti:!0,bSortCellsTop:!1,bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(a){return a.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,
fnInfoCallback:null,fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(a){try{return JSON.parse((-1===a.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+a.sInstance+"_"+location.pathname))}catch(b){}},fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(a,b){try{(-1===a.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+a.sInstance+"_"+location.pathname,JSON.stringify(b))}catch(c){}},
fnStateSaveParams:null,iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},oPaginate:{sFirst:"First",sLast:"Last",sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",
sInfoPostFix:"",sDecimal:"",sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"Processing...",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:h.extend({},m.models.oSearch),sAjaxDataProp:"data",sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"};X(m.defaults);m.defaults.column={aDataSort:null,
iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null};X(m.defaults.column);m.models.oSettings={oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,
iBarWidth:0,sX:null,sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],aoFooter:[],oPreviousSearch:{},aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],
aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",iStateDuration:0,aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,bAjaxDataGet:!0,jqXHR:null,json:k,oAjaxData:k,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,
iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,bJUI:null,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==y(this)?1*this._iRecordsTotal:this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==y(this)?1*this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var a=this._iDisplayLength,b=this._iDisplayStart,c=b+a,d=this.aiDisplay.length,e=
this.oFeatures,f=e.bPaginate;return e.bServerSide?!1===f||-1===a?b+d:Math.min(b+a,this._iRecordsDisplay):!f||c>d||-1===a?d:c},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null};m.ext=v={buttons:{},classes:{},errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:m.fnVersionCheck,
iApiIndex:0,oJUIClasses:{},sVersion:m.version};h.extend(v,{afnFiltering:v.search,aTypes:v.type.detect,ofnSearch:v.type.search,oSort:v.type.order,afnSortData:v.order,aoFeatures:v.feature,oApi:v.internal,oStdClasses:v.classes,oPagination:v.pager});h.extend(m.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",
sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_asc_disabled",sSortableDesc:"sorting_desc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody",
sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"",sJUIHeader:"",sJUIFooter:""});var Ea="",Ea="",G=Ea+"ui-state-default",ka=Ea+"css_right ui-icon ui-icon-",Xb=Ea+"fg-toolbar ui-toolbar ui-widget-header ui-helper-clearfix";h.extend(m.ext.oJUIClasses,m.ext.classes,{sPageButton:"fg-button ui-button "+G,sPageButtonActive:"ui-state-disabled",
sPageButtonDisabled:"ui-state-disabled",sPaging:"dataTables_paginate fg-buttonset ui-buttonset fg-buttonset-multi ui-buttonset-multi paging_",sSortAsc:G+" sorting_asc",sSortDesc:G+" sorting_desc",sSortable:G+" sorting",sSortableAsc:G+" sorting_asc_disabled",sSortableDesc:G+" sorting_desc_disabled",sSortableNone:G+" sorting_disabled",sSortJUIAsc:ka+"triangle-1-n",sSortJUIDesc:ka+"triangle-1-s",sSortJUI:ka+"carat-2-n-s",sSortJUIAscAllowed:ka+"carat-1-n",sSortJUIDescAllowed:ka+"carat-1-s",sSortJUIWrapper:"DataTables_sort_wrapper",
sSortIcon:"DataTables_sort_icon",sScrollHead:"dataTables_scrollHead "+G,sScrollFoot:"dataTables_scrollFoot "+G,sHeaderTH:G,sFooterTH:G,sJUIHeader:Xb+" ui-corner-tl ui-corner-tr",sJUIFooter:Xb+" ui-corner-bl ui-corner-br"});var Mb=m.ext.pager;h.extend(Mb,{simple:function(){return["previous","next"]},full:function(){return["first","previous","next","last"]},numbers:function(a,b){return[Aa(a,b)]},simple_numbers:function(a,b){return["previous",Aa(a,b),"next"]},full_numbers:function(a,b){return["first",
"previous",Aa(a,b),"next","last"]},_numbers:Aa,numbers_length:7});h.extend(!0,m.ext.renderer,{pageButton:{_:function(a,b,c,d,e,f){var g=a.oClasses,i=a.oLanguage.oPaginate,j,k,l=0,m=function(b,d){var p,q,t,s,u=function(b){Ta(a,b.data.action,true)};p=0;for(q=d.length;p<q;p++){s=d[p];if(h.isArray(s)){t=h("<"+(s.DT_el||"div")+"/>").appendTo(b);m(t,s)}else{j=null;k="";switch(s){case "ellipsis":b.append('<span class="ellipsis">&#x2026;</span>');break;case "first":j=i.sFirst;k=s+(e>0?"":" "+g.sPageButtonDisabled);
break;case "previous":j=i.sPrevious;k=s+(e>0?"":" "+g.sPageButtonDisabled);break;case "next":j=i.sNext;k=s+(e<f-1?"":" "+g.sPageButtonDisabled);break;case "last":j=i.sLast;k=s+(e<f-1?"":" "+g.sPageButtonDisabled);break;default:j=s+1;k=e===s?g.sPageButtonActive:""}if(j!==null){t=h("<a>",{"class":g.sPageButton+" "+k,"aria-controls":a.sTableId,"data-dt-idx":l,tabindex:a.iTabIndex,id:c===0&&typeof s==="string"?a.sTableId+"_"+s:null}).html(j).appendTo(b);Va(t,{action:s},u);l++}}}},p;try{p=h(b).find(T.activeElement).data("dt-idx")}catch(t){}m(h(b).empty(),
d);p&&h(b).find("[data-dt-idx="+p+"]").focus()}}});h.extend(m.ext.type.detect,[function(a,b){var c=b.oLanguage.sDecimal;return Ya(a,c)?"num"+c:null},function(a){if(a&&!(a instanceof Date)&&(!ac.test(a)||!bc.test(a)))return null;var b=Date.parse(a);return null!==b&&!isNaN(b)||K(a)?"date":null},function(a,b){var c=b.oLanguage.sDecimal;return Ya(a,c,!0)?"num-fmt"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Rb(a,c)?"html-num"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Rb(a,c,
!0)?"html-num-fmt"+c:null},function(a){return K(a)||"string"===typeof a&&-1!==a.indexOf("<")?"html":null}]);h.extend(m.ext.type.search,{html:function(a){return K(a)?a:"string"===typeof a?a.replace(Ob," ").replace(Ca,""):""},string:function(a){return K(a)?a:"string"===typeof a?a.replace(Ob," "):a}});var Ba=function(a,b,c,d){if(0!==a&&(!a||"-"===a))return-Infinity;b&&(a=Qb(a,b));a.replace&&(c&&(a=a.replace(c,"")),d&&(a=a.replace(d,"")));return 1*a};h.extend(v.type.order,{"date-pre":function(a){return Date.parse(a)||
0},"html-pre":function(a){return K(a)?"":a.replace?a.replace(/<.*?>/g,"").toLowerCase():a+""},"string-pre":function(a){return K(a)?"":"string"===typeof a?a.toLowerCase():!a.toString?"":a.toString()},"string-asc":function(a,b){return a<b?-1:a>b?1:0},"string-desc":function(a,b){return a<b?1:a>b?-1:0}});cb("");h.extend(!0,m.ext.renderer,{header:{_:function(a,b,c,d){h(a.nTable).on("order.dt.DT",function(e,f,g,h){if(a===f){e=c.idx;b.removeClass(c.sSortingClass+" "+d.sSortAsc+" "+d.sSortDesc).addClass(h[e]==
"asc"?d.sSortAsc:h[e]=="desc"?d.sSortDesc:c.sSortingClass)}})},jqueryui:function(a,b,c,d){h("<div/>").addClass(d.sSortJUIWrapper).append(b.contents()).append(h("<span/>").addClass(d.sSortIcon+" "+c.sSortingClassJUI)).appendTo(b);h(a.nTable).on("order.dt.DT",function(e,f,g,h){if(a===f){e=c.idx;b.removeClass(d.sSortAsc+" "+d.sSortDesc).addClass(h[e]=="asc"?d.sSortAsc:h[e]=="desc"?d.sSortDesc:c.sSortingClass);b.find("span."+d.sSortIcon).removeClass(d.sSortJUIAsc+" "+d.sSortJUIDesc+" "+d.sSortJUI+" "+
d.sSortJUIAscAllowed+" "+d.sSortJUIDescAllowed).addClass(h[e]=="asc"?d.sSortJUIAsc:h[e]=="desc"?d.sSortJUIDesc:c.sSortingClassJUI)}})}}});m.render={number:function(a,b,c,d,e){return{display:function(f){if("number"!==typeof f&&"string"!==typeof f)return f;var g=0>f?"-":"",f=Math.abs(parseFloat(f)),h=parseInt(f,10),f=c?b+(f-h).toFixed(c).substring(2):"";return g+(d||"")+h.toString().replace(/\B(?=(\d{3})+(?!\d))/g,a)+f+(e||"")}}}};h.extend(m.ext.internal,{_fnExternApiFunc:Nb,_fnBuildAjax:ra,_fnAjaxUpdate:lb,
_fnAjaxParameters:ub,_fnAjaxUpdateDraw:vb,_fnAjaxDataSrc:sa,_fnAddColumn:Ga,_fnColumnOptions:la,_fnAdjustColumnSizing:Y,_fnVisibleToColumnIndex:$,_fnColumnIndexToVisible:ba,_fnVisbleColumns:ca,_fnGetColumns:aa,_fnColumnTypes:Ia,_fnApplyColumnDefs:hb,_fnHungarianMap:X,_fnCamelToHungarian:I,_fnLanguageCompat:S,_fnBrowserDetect:fb,_fnAddData:L,_fnAddTr:ma,_fnNodeToDataIndex:function(a,b){return b._DT_RowIndex!==k?b._DT_RowIndex:null},_fnNodeToColumnIndex:function(a,b,c){return h.inArray(c,a.aoData[b].anCells)},
_fnGetCellData:B,_fnSetCellData:ib,_fnSplitObjNotation:La,_fnGetObjectDataFn:P,_fnSetObjectDataFn:Q,_fnGetDataMaster:Ma,_fnClearTable:na,_fnDeleteIndex:oa,_fnInvalidate:ea,_fnGetRowElements:Ka,_fnCreateTr:Ja,_fnBuildHead:kb,_fnDrawHead:ga,_fnDraw:M,_fnReDraw:R,_fnAddOptionsHtml:nb,_fnDetectHeader:fa,_fnGetUniqueThs:qa,_fnFeatureHtmlFilter:pb,_fnFilterComplete:ha,_fnFilterCustom:yb,_fnFilterColumn:xb,_fnFilter:wb,_fnFilterCreateSearch:Qa,_fnEscapeRegex:va,_fnFilterData:zb,_fnFeatureHtmlInfo:sb,_fnUpdateInfo:Cb,
_fnInfoMacros:Db,_fnInitialise:ia,_fnInitComplete:ta,_fnLengthChange:Ra,_fnFeatureHtmlLength:ob,_fnFeatureHtmlPaginate:tb,_fnPageChange:Ta,_fnFeatureHtmlProcessing:qb,_fnProcessingDisplay:C,_fnFeatureHtmlTable:rb,_fnScrollDraw:Z,_fnApplyToChildren:H,_fnCalculateColumnWidths:Ha,_fnThrottle:ua,_fnConvertToWidth:Fb,_fnGetWidestNode:Gb,_fnGetMaxLenString:Hb,_fnStringToCss:u,_fnSortFlatten:V,_fnSort:mb,_fnSortAria:Jb,_fnSortListener:Ua,_fnSortAttachListener:Oa,_fnSortingClasses:xa,_fnSortData:Ib,_fnSaveState:ya,
_fnLoadState:Kb,_fnSettingsFromNode:za,_fnLog:J,_fnMap:F,_fnBindAction:Va,_fnCallbackReg:z,_fnCallbackFire:w,_fnLengthOverflow:Sa,_fnRenderer:Pa,_fnDataSource:y,_fnRowAttributes:Na,_fnCalculateEnd:function(){}});h.fn.dataTable=m;h.fn.dataTableSettings=m.settings;h.fn.dataTableExt=m.ext;h.fn.DataTable=function(a){return h(this).dataTable(a).api()};h.each(m,function(a,b){h.fn.DataTable[a]=b});return h.fn.dataTable};"function"===typeof define&&define.amd?define("datatables",["jquery"],S):"object"===
typeof exports?module.exports=S(require("jquery")):jQuery&&!jQuery.fn.dataTable&&S(jQuery)})(window,document);


/*!
DataTables Bootstrap 3 integration
©2011-2014 SpryMedia Ltd - datatables.net/license
*/
(function(l,q){var d=function(b,c){b.extend(!0,c.defaults,{dom:"<'row'<'col-sm-6'l><'col-sm-6'f>><'row'<'col-sm-12'tr>><'row'<'col-sm-5'i><'col-sm-7'p>>",renderer:"bootstrap"});b.extend(c.ext.classes,{sWrapper:"dataTables_wrapper form-inline dt-bootstrap",sFilterInput:"form-control input-sm",sLengthSelect:"form-control input-sm"});c.ext.renderer.pageButton.bootstrap=function(g,d,r,s,i,m){var t=new c.Api(g),u=g.oClasses,j=g.oLanguage.oPaginate,e,f,n=0,p=function(c,d){var k,h,o,a,l=function(a){a.preventDefault();
b(a.currentTarget).hasClass("disabled")||t.page(a.data.action).draw("page")};k=0;for(h=d.length;k<h;k++)if(a=d[k],b.isArray(a))p(c,a);else{f=e="";switch(a){case "ellipsis":e="&hellip;";f="disabled";break;case "first":e=j.sFirst;f=a+(0<i?"":" disabled");break;case "previous":e=j.sPrevious;f=a+(0<i?"":" disabled");break;case "next":e=j.sNext;f=a+(i<m-1?"":" disabled");break;case "last":e=j.sLast;f=a+(i<m-1?"":" disabled");break;default:e=a+1,f=i===a?"active":""}e&&(o=b("<li>",{"class":u.sPageButton+
" "+f,id:0===r&&"string"===typeof a?g.sTableId+"_"+a:null}).append(b("<a>",{href:"#","aria-controls":g.sTableId,"data-dt-idx":n,tabindex:g.iTabIndex}).html(e)).appendTo(c),g.oApi._fnBindAction(o,{action:a},l),n++)}},h;try{h=b(d).find(q.activeElement).data("dt-idx")}catch(l){}p(b(d).empty().html('<ul class="pagination"/>').children("ul"),s);h&&b(d).find("[data-dt-idx="+h+"]").focus()};c.TableTools&&(b.extend(!0,c.TableTools.classes,{container:"DTTT btn-group",buttons:{normal:"btn btn-default",disabled:"disabled"},
collection:{container:"DTTT_dropdown dropdown-menu",buttons:{normal:"",disabled:"disabled"}},print:{info:"DTTT_print_info"},select:{row:"active"}}),b.extend(!0,c.TableTools.DEFAULTS.oTags,{collection:{container:"ul",button:"li",liner:"a"}}))};"function"===typeof define&&define.amd?define(["jquery","datatables"],d):"object"===typeof exports?d(require("jquery"),require("datatables")):jQuery&&d(jQuery,jQuery.fn.dataTable)})(window,document);


jQuery.fn.dataTable.ext.builder = "bs-3.3.5\/jqc-1.11.3,dt-1.10.9,fh-3.0.0,r-1.0.7,sc-1.3.0";

/*!
FixedHeader 3.0.0
©2009-2015 SpryMedia Ltd - datatables.net/license
*/
(function(h,j){var g=function(e,i){var g=0,f=function(b,a){if(!(this instanceof f))throw"FixedHeader must be initialised with the 'new' keyword.";!0===a&&(a={});b=new i.Api(b);this.c=e.extend(!0,{},f.defaults,a);this.s={dt:b,position:{theadTop:0,tbodyTop:0,tfootTop:0,tfootBottom:0,width:0,left:0,tfootHeight:0,theadHeight:0,windowHeight:e(h).height(),visible:!0},headerMode:null,footerMode:null,namespace:".dtfc"+g++};this.dom={floatingHeader:null,thead:e(b.table().header()),tbody:e(b.table().body()),
tfoot:e(b.table().footer()),header:{host:null,floating:null,placeholder:null},footer:{host:null,floating:null,placeholder:null}};this.dom.header.host=this.dom.thead.parent();this.dom.footer.host=this.dom.tfoot.parent();var c=b.settings()[0];if(c._fixedHeader)throw"FixedHeader already initialised on table "+c.nTable.id;c._fixedHeader=this;this._constructor()};f.prototype={update:function(){this._positions();this._scroll(!0)},_constructor:function(){var b=this,a=this.s.dt;e(h).on("scroll"+this.s.namespace,
function(){b._scroll()}).on("resize"+this.s.namespace,function(){b.s.position.windowHeight=e(h).height();b._positions();b._scroll(!0)});a.on("column-reorder.dt.dtfc column-visibility.dt.dtfc",function(){b._positions();b._scroll(!0)}).on("draw.dtfc",function(){b._positions();b._scroll()});a.on("destroy.dtfc",function(){a.off(".dtfc");e(h).off(this.s.namespace)});this._positions();this._scroll()},_clone:function(b,a){var c=this.s.dt,d=this.dom[b],k="header"===b?this.dom.thead:this.dom.tfoot;!a&&d.floating?
d.floating.removeClass("fixedHeader-floating fixedHeader-locked"):(d.floating&&(d.placeholder.remove(),d.floating.children().detach(),d.floating.remove()),d.floating=e(c.table().node().cloneNode(!1)).removeAttr("id").append(k).appendTo("body"),d.placeholder=k.clone(!1),d.host.append(d.placeholder),"footer"===b&&this._footerMatch(d.placeholder,d.floating))},_footerMatch:function(b,a){var c=function(d){var c=e(d,b).map(function(){return e(this).width()}).toArray();e(d,a).each(function(a){e(this).width(c[a])})};
c("th");c("td")},_footerUnsize:function(){var b=this.dom.footer.floating;b&&e("th, td",b).css("width","")},_modeChange:function(b,a,c){var d=this.dom[a],e=this.s.position;"in-place"===b?(d.placeholder&&(d.placeholder.remove(),d.placeholder=null),d.host.append("header"===a?this.dom.thead:this.dom.tfoot),d.floating&&(d.floating.remove(),d.floating=null),"footer"===a&&this._footerUnsize()):"in"===b?(this._clone(a,c),d.floating.addClass("fixedHeader-floating").css("header"===a?"top":"bottom",this.c[a+
"Offset"]).css("left",e.left+"px").css("width",e.width+"px"),"footer"===a&&d.floating.css("top","")):"below"===b?(this._clone(a,c),d.floating.addClass("fixedHeader-locked").css("top",e.tfootTop-e.theadHeight).css("left",e.left+"px").css("width",e.width+"px")):"above"===b&&(this._clone(a,c),d.floating.addClass("fixedHeader-locked").css("top",e.tbodyTop).css("left",e.left+"px").css("width",e.width+"px"));this.s[a+"Mode"]=b},_positions:function(){var b=this.s.dt.table(),a=this.s.position,c=this.dom,
b=e(b.node()),d=b.children("thead"),f=b.children("tfoot"),c=c.tbody;a.visible=b.is(":visible");a.width=b.outerWidth();a.left=b.offset().left;a.theadTop=d.offset().top;a.tbodyTop=c.offset().top;a.theadHeight=a.tbodyTop-a.theadTop;f.length?(a.tfootTop=f.offset().top,a.tfootBottom=a.tfootTop+f.outerHeight(),a.tfootHeight=a.tfootBottom-a.tfootTop):(a.tfootTop=a.tbodyTop+c.outerHeight(),a.tfootBottom=a.tfootTop,a.tfootHeight=a.tfootTop)},_scroll:function(b){var a=e(j).scrollTop(),c=this.s.position,d;this.c.header&&
(d=!c.visible||a<=c.theadTop-this.c.headerOffset?"in-place":a<=c.tfootTop-c.theadHeight-this.c.headerOffset?"in":"below",(b||d!==this.s.headerMode)&&this._modeChange(d,"header",b));this.c.footer&&this.dom.tfoot.length&&(a=!c.visible||a+c.windowHeight>=c.tfootBottom+this.c.footerOffset?"in-place":c.windowHeight+a>c.tbodyTop+c.tfootHeight+this.c.footerOffset?"in":"above",(b||a!==this.s.footerMode)&&this._modeChange(a,"footer",b))}};f.version="3.0.0";f.defaults={header:!0,footer:!1,headerOffset:0,footerOffset:0};
e.fn.dataTable.FixedHeader=f;e.fn.DataTable.FixedHeader=f;e(j).on("init.dt.dtb",function(b,a){if("dt"===b.namespace){var c=a.oInit.fixedHeader||i.defaults.fixedHeader;c&&!a._buttons&&new f(a,c)}});i.Api.register("fixedHeader()",function(){});i.Api.register("fixedHeader.adjust()",function(){return this.iterator("table",function(b){(b=b._fixedHeader)&&b.update()})});return f};"function"===typeof define&&define.amd?define(["jquery","datatables"],g):"object"===typeof exports?g(require("jquery"),require("datatables")):
jQuery&&!jQuery.fn.dataTable.FixedHeader&&g(jQuery,jQuery.fn.dataTable)})(window,document);


/*!
Responsive 1.0.7
2014-2015 SpryMedia Ltd - datatables.net/license
*/
(function(n,p){var o=function(e,k){var h=function(d,a){if(!k.versionCheck||!k.versionCheck("1.10.1"))throw"DataTables Responsive requires DataTables 1.10.1 or newer";this.s={dt:new k.Api(d),columns:[]};this.s.dt.settings()[0].responsive||(a&&"string"===typeof a.details&&(a.details={type:a.details}),this.c=e.extend(!0,{},h.defaults,k.defaults.responsive,a),d.responsive=this,this._constructor())};h.prototype={_constructor:function(){var d=this,a=this.s.dt;a.settings()[0]._responsive=this;e(n).on("resize.dtr orientationchange.dtr",
a.settings()[0].oApi._fnThrottle(function(){d._resize()}));a.on("destroy.dtr",function(){e(n).off("resize.dtr orientationchange.dtr draw.dtr")});this.c.breakpoints.sort(function(a,c){return a.width<c.width?1:a.width>c.width?-1:0});this._classLogic();this._resizeAuto();var c=this.c.details;c.type&&(d._detailsInit(),this._detailsVis(),a.on("column-visibility.dtr",function(){d._detailsVis()}),a.on("draw.dtr",function(){a.rows({page:"current"}).iterator("row",function(b,c){var f=a.row(c);if(f.child.isShown()){var i=
d.c.details.renderer(a,c);f.child(i,"child").show()}})}),e(a.table().node()).addClass("dtr-"+c.type));this._resize()},_columnsVisiblity:function(d){var a=this.s.dt,c=this.s.columns,b,g,f=e.map(c,function(a){return a.auto&&null===a.minWidth?!1:!0===a.auto?"-":-1!==e.inArray(d,a.includeIn)}),i=0;b=0;for(g=f.length;b<g;b++)!0===f[b]&&(i+=c[b].minWidth);b=a.settings()[0].oScroll;b=b.sY||b.sX?b.iBarWidth:0;a=a.table().container().offsetWidth-b-i;b=0;for(g=f.length;b<g;b++)c[b].control&&(a-=c[b].minWidth);
i=!1;b=0;for(g=f.length;b<g;b++)"-"===f[b]&&!c[b].control&&(i||0>a-c[b].minWidth?(i=!0,f[b]=!1):f[b]=!0,a-=c[b].minWidth);a=!1;b=0;for(g=c.length;b<g;b++)if(!c[b].control&&!c[b].never&&!f[b]){a=!0;break}b=0;for(g=c.length;b<g;b++)c[b].control&&(f[b]=a);-1===e.inArray(!0,f)&&(f[0]=!0);return f},_classLogic:function(){var d=this,a=this.c.breakpoints,c=this.s.dt.columns().eq(0).map(function(a){a=this.column(a).header().className;return{className:a,includeIn:[],auto:!1,control:!1,never:a.match(/\bnever\b/)?
!0:!1}}),b=function(a,b){var d=c[a].includeIn;-1===e.inArray(b,d)&&d.push(b)},g=function(f,g,e,j){if(e)if("max-"===e){j=d._find(g).width;g=0;for(e=a.length;g<e;g++)a[g].width<=j&&b(f,a[g].name)}else if("min-"===e){j=d._find(g).width;g=0;for(e=a.length;g<e;g++)a[g].width>=j&&b(f,a[g].name)}else{if("not-"===e){g=0;for(e=a.length;g<e;g++)-1===a[g].name.indexOf(j)&&b(f,a[g].name)}}else c[f].includeIn.push(g)};c.each(function(b,c){for(var d=b.className.split(" "),j=!1,h=0,k=d.length;h<k;h++){var l=e.trim(d[h]);
if("all"===l){j=!0;b.includeIn=e.map(a,function(a){return a.name});return}if("none"===l||"never"===l){j=!0;return}if("control"===l){j=!0;b.control=!0;return}e.each(a,function(a,b){var d=b.name.split("-"),e=l.match(RegExp("(min\\-|max\\-|not\\-)?("+d[0]+")(\\-[_a-zA-Z0-9])?"));e&&(j=!0,e[2]===d[0]&&e[3]==="-"+d[1]?g(c,b.name,e[1],e[2]+e[3]):e[2]===d[0]&&!e[3]&&g(c,b.name,e[1],e[2]))})}j||(b.auto=!0)});this.s.columns=c},_detailsInit:function(){var d=this,a=this.s.dt,c=this.c.details;"inline"===c.type&&
(c.target="td:first-child");var b=c.target;e(a.table().body()).on("click","string"===typeof b?b:"td",function(){if(e(a.table().node()).hasClass("collapsed")&&a.row(e(this).closest("tr")).length){if(typeof b==="number"){var c=b<0?a.columns().eq(0).length+b:b;if(a.cell(this).index().column!==c)return}c=a.row(e(this).closest("tr"));if(c.child.isShown()){c.child(false);e(c.node()).removeClass("parent")}else{var f=d.c.details.renderer(a,c[0]);c.child(f,"child").show();e(c.node()).addClass("parent")}}})},
_detailsVis:function(){var d=this,a=this.s.dt,c=a.columns().indexes().filter(function(b){var c=a.column(b);return c.visible()?null:e(c.header()).hasClass("never")?null:b}),b=!0;if(0===c.length||1===c.length&&this.s.columns[c[0]].control)b=!1;b?a.rows({page:"current"}).eq(0).each(function(b){b=a.row(b);if(b.child()){var c=d.c.details.renderer(a,b[0]);!1===c?b.child.hide():b.child(c,"child").show()}}):a.rows({page:"current"}).eq(0).each(function(b){a.row(b).child.hide()})},_find:function(d){for(var a=
this.c.breakpoints,c=0,b=a.length;c<b;c++)if(a[c].name===d)return a[c]},_resize:function(){var d=this.s.dt,a=e(n).width(),c=this.c.breakpoints,b=c[0].name,g=this.s.columns,f;for(f=c.length-1;0<=f;f--)if(a<=c[f].width){b=c[f].name;break}var i=this._columnsVisiblity(b),c=!1;f=0;for(a=g.length;f<a;f++)if(!1===i[f]&&!g[f].never){c=!0;break}e(d.table().node()).toggleClass("collapsed",c);d.columns().eq(0).each(function(a,b){d.column(a).visible(i[b])})},_resizeAuto:function(){var d=this.s.dt,a=this.s.columns;
if(this.c.auto&&-1!==e.inArray(!0,e.map(a,function(a){return a.auto}))){d.table().node();var c=d.table().node().cloneNode(!1),b=e(d.table().header().cloneNode(!1)).appendTo(c),g=e(d.table().body().cloneNode(!1)).appendTo(c);e(d.table().footer()).clone(!1).appendTo(c);d.rows({page:"current"}).indexes().flatten().each(function(a){var b=d.row(a).node().cloneNode(!0);d.columns(":hidden").flatten().length&&e(b).append(d.cells(a,":hidden").nodes().to$().clone());e(b).appendTo(g)});var f=d.columns().header().to$().clone(!1);
e("<tr/>").append(f).appendTo(b);"inline"===this.c.details.type&&e(c).addClass("dtr-inline collapsed");c=e("<div/>").css({width:1,height:1,overflow:"hidden"}).append(c);c.find("th.never, td.never").remove();c.insertBefore(d.table().node());d.columns().eq(0).each(function(b){a[b].minWidth=f[b].offsetWidth||0});c.remove()}}};h.breakpoints=[{name:"desktop",width:Infinity},{name:"tablet-l",width:1024},{name:"tablet-p",width:768},{name:"mobile-l",width:480},{name:"mobile-p",width:320}];h.defaults={breakpoints:h.breakpoints,
auto:!0,details:{renderer:function(d,a){var c=d.cells(a,":hidden").eq(0).map(function(a){var c=e(d.column(a.column).header()),a=d.cell(a).index();if(c.hasClass("control")||c.hasClass("never"))return"";var f=d.settings()[0],f=f.oApi._fnGetCellData(f,a.row,a.column,"display");(c=c.text())&&(c+=":");return'<li data-dtr-index="'+a.column+'"><span class="dtr-title">'+c+'</span> <span class="dtr-data">'+f+"</span></li>"}).toArray().join("");return c?e('<ul data-dtr-index="'+a+'"/>').append(c):!1},target:0,
type:"inline"}};var m=e.fn.dataTable.Api;m.register("responsive()",function(){return this});m.register("responsive.index()",function(d){d=e(d);return{column:d.data("dtr-index"),row:d.parent().data("dtr-index")}});m.register("responsive.rebuild()",function(){return this.iterator("table",function(d){d._responsive&&d._responsive._classLogic()})});m.register("responsive.recalc()",function(){return this.iterator("table",function(d){d._responsive&&(d._responsive._resizeAuto(),d._responsive._resize())})});
h.version="1.0.7";e.fn.dataTable.Responsive=h;e.fn.DataTable.Responsive=h;e(p).on("init.dt.dtr",function(d,a){if("dt"===d.namespace&&(e(a.nTable).hasClass("responsive")||e(a.nTable).hasClass("dt-responsive")||a.oInit.responsive||k.defaults.responsive)){var c=a.oInit.responsive;!1!==c&&new h(a,e.isPlainObject(c)?c:{})}});return h};"function"===typeof define&&define.amd?define(["jquery","datatables"],o):"object"===typeof exports?o(require("jquery"),require("datatables")):jQuery&&!jQuery.fn.dataTable.Responsive&&
o(jQuery,jQuery.fn.dataTable)})(window,document);


/*!
Scroller 1.3.0
©2011-2015 SpryMedia Ltd - datatables.net/license
*/
(function(m,n,k){var j=function(e,j){var g=function(a,b){this instanceof g?(b===k&&(b={}),this.s={dt:e.fn.dataTable.Api(a).settings()[0],tableTop:0,tableBottom:0,redrawTop:0,redrawBottom:0,autoHeight:!0,viewportRows:0,stateTO:null,drawTO:null,heights:{jump:null,page:null,virtual:null,scroll:null,row:null,viewport:null},topRowFloat:0,scrollDrawDiff:null,loaderVisible:!1},this.s=e.extend(this.s,g.oDefaults,b),this.s.heights.row=this.s.rowHeight,this.dom={force:n.createElement("div"),scroller:null,table:null,
loader:null},this.s.dt.oScroller||(this.s.dt.oScroller=this,this._fnConstruct())):alert("Scroller warning: Scroller must be initialised with the 'new' keyword.")};g.prototype={fnRowToPixels:function(a,b,c){a=c?this._domain("virtualToPhysical",a*this.s.heights.row):this.s.baseScrollTop+(a-this.s.baseRowTop)*this.s.heights.row;return b||b===k?parseInt(a,10):a},fnPixelsToRow:function(a,b,c){var d=a-this.s.baseScrollTop,a=c?this._domain("physicalToVirtual",a)/this.s.heights.row:d/this.s.heights.row+this.s.baseRowTop;
return b||b===k?parseInt(a,10):a},fnScrollToRow:function(a,b){var c=this,d=!1,f=this.fnRowToPixels(a),i=a-(this.s.displayBuffer-1)/2*this.s.viewportRows;0>i&&(i=0);if((f>this.s.redrawBottom||f<this.s.redrawTop)&&this.s.dt._iDisplayStart!==i)d=!0,f=this.fnRowToPixels(a,!1,!0);"undefined"==typeof b||b?(this.s.ani=d,e(this.dom.scroller).animate({scrollTop:f},function(){setTimeout(function(){c.s.ani=!1},25)})):e(this.dom.scroller).scrollTop(f)},fnMeasure:function(a){this.s.autoHeight&&this._fnCalcRowHeight();
var b=this.s.heights;b.viewport=e(this.dom.scroller).height();this.s.viewportRows=parseInt(b.viewport/b.row,10)+1;this.s.dt._iDisplayLength=this.s.viewportRows*this.s.displayBuffer;(a===k||a)&&this.s.dt.oInstance.fnDraw()},_fnConstruct:function(){var a=this;if(this.s.dt.oFeatures.bPaginate){this.dom.force.style.position="relative";this.dom.force.style.top="0px";this.dom.force.style.left="0px";this.dom.force.style.width="1px";this.dom.scroller=e("div."+this.s.dt.oClasses.sScrollBody,this.s.dt.nTableWrapper)[0];
this.dom.scroller.appendChild(this.dom.force);this.dom.scroller.style.position="relative";this.dom.table=e(">table",this.dom.scroller)[0];this.dom.table.style.position="absolute";this.dom.table.style.top="0px";this.dom.table.style.left="0px";e(this.s.dt.nTableWrapper).addClass("DTS");this.s.loadingIndicator&&(this.dom.loader=e('<div class="dataTables_processing DTS_Loading">'+this.s.dt.oLanguage.sLoadingRecords+"</div>").css("display","none"),e(this.dom.scroller.parentNode).css("position","relative").append(this.dom.loader));
this.s.heights.row&&"auto"!=this.s.heights.row&&(this.s.autoHeight=!1);this.fnMeasure(!1);this.s.ingnoreScroll=!0;this.s.stateSaveThrottle=this.s.dt.oApi._fnThrottle(function(){a.s.dt.oApi._fnSaveState(a.s.dt)},500);e(this.dom.scroller).on("scroll.DTS",function(){a._fnScroll.call(a)});e(this.dom.scroller).on("touchstart.DTS",function(){a._fnScroll.call(a)});this.s.dt.aoDrawCallback.push({fn:function(){a.s.dt.bInitialised&&a._fnDrawCallback.call(a)},sName:"Scroller"});e(m).on("resize.DTS",function(){a.fnMeasure(false);
a._fnInfo()});var b=!0;this.s.dt.oApi._fnCallbackReg(this.s.dt,"aoStateSaveParams",function(c,d){if(b&&a.s.dt.oLoadedState){d.iScroller=a.s.dt.oLoadedState.iScroller;d.iScrollerTopRow=a.s.dt.oLoadedState.iScrollerTopRow;b=false}else{d.iScroller=a.dom.scroller.scrollTop;d.iScrollerTopRow=a.s.topRowFloat}},"Scroller_State");this.s.dt.oLoadedState&&(this.s.topRowFloat=this.s.dt.oLoadedState.iScrollerTopRow||0);e(this.s.dt.nTable).on("init.dt",function(){a.fnMeasure()});this.s.dt.aoDestroyCallback.push({sName:"Scroller",
fn:function(){e(m).off("resize.DTS");e(a.dom.scroller).off("touchstart.DTS scroll.DTS");e(a.s.dt.nTableWrapper).removeClass("DTS");e("div.DTS_Loading",a.dom.scroller.parentNode).remove();e(a.s.dt.nTable).off("init.dt");a.dom.table.style.position="";a.dom.table.style.top="";a.dom.table.style.left=""}})}else this.s.dt.oApi._fnLog(this.s.dt,0,"Pagination must be enabled for Scroller")},_fnScroll:function(){var a=this,b=this.s.heights,c=this.dom.scroller.scrollTop,d;if(!this.s.skip&&!this.s.ingnoreScroll)if(this.s.dt.bFiltered||
this.s.dt.bSorted)this.s.lastScrollTop=0;else{this._fnInfo();clearTimeout(this.s.stateTO);this.s.stateTO=setTimeout(function(){a.s.dt.oApi._fnSaveState(a.s.dt)},250);if(c<this.s.redrawTop||c>this.s.redrawBottom){var f=Math.ceil((this.s.displayBuffer-1)/2*this.s.viewportRows);Math.abs(c-this.s.lastScrollTop)>b.viewport||this.s.ani?(d=parseInt(this._domain("physicalToVirtual",c)/b.row,10)-f,this.s.topRowFloat=this._domain("physicalToVirtual",c)/b.row):(d=this.fnPixelsToRow(c)-f,this.s.topRowFloat=this.fnPixelsToRow(c,
!1));0>=d?d=0:d+this.s.dt._iDisplayLength>this.s.dt.fnRecordsDisplay()?(d=this.s.dt.fnRecordsDisplay()-this.s.dt._iDisplayLength,0>d&&(d=0)):0!==d%2&&d++;if(d!=this.s.dt._iDisplayStart&&(this.s.tableTop=e(this.s.dt.nTable).offset().top,this.s.tableBottom=e(this.s.dt.nTable).height()+this.s.tableTop,b=function(){if(a.s.scrollDrawReq===null)a.s.scrollDrawReq=c;a.s.dt._iDisplayStart=d;a.s.dt.oApi._fnDraw(a.s.dt)},this.s.dt.oFeatures.bServerSide?(clearTimeout(this.s.drawTO),this.s.drawTO=setTimeout(b,
this.s.serverWait)):b(),this.dom.loader&&!this.s.loaderVisible))this.dom.loader.css("display","block"),this.s.loaderVisible=!0}this.s.lastScrollTop=c;this.s.stateSaveThrottle()}},_domain:function(a,b){var c=this.s.heights,d;if(c.virtual===c.scroll){d=(c.virtual-c.viewport)/(c.scroll-c.viewport);if("virtualToPhysical"===a)return b/d;if("physicalToVirtual"===a)return b*d}var e=(c.scroll-c.viewport)/2,i=(c.virtual-c.viewport)/2;d=i/(e*e);if("virtualToPhysical"===a){if(b<i)return Math.pow(b/d,0.5);b=
2*i-b;return 0>b?c.scroll:2*e-Math.pow(b/d,0.5)}if("physicalToVirtual"===a){if(b<e)return b*b*d;b=2*e-b;return 0>b?c.virtual:2*i-b*b*d}},_fnDrawCallback:function(){var a=this,b=this.s.heights,c=this.dom.scroller.scrollTop,d=e(this.s.dt.nTable).height(),f=this.s.dt._iDisplayStart,i=this.s.dt._iDisplayLength,g=this.s.dt.fnRecordsDisplay();this.s.skip=!0;this._fnScrollForce();c=0===f?this.s.topRowFloat*b.row:f+i>=g?b.scroll-(g-this.s.topRowFloat)*b.row:this._domain("virtualToPhysical",this.s.topRowFloat*
b.row);this.dom.scroller.scrollTop=c;this.s.baseScrollTop=c;this.s.baseRowTop=this.s.topRowFloat;var l=c-(this.s.topRowFloat-f)*b.row;0===f?l=0:f+i>=g&&(l=b.scroll-d);this.dom.table.style.top=l+"px";this.s.tableTop=l;this.s.tableBottom=d+this.s.tableTop;d=(c-this.s.tableTop)*this.s.boundaryScale;this.s.redrawTop=c-d;this.s.redrawBottom=c+d;this.s.skip=!1;this.s.dt.oFeatures.bStateSave&&null!==this.s.dt.oLoadedState&&"undefined"!=typeof this.s.dt.oLoadedState.iScroller?((c=(this.s.dt.sAjaxSource||
a.s.dt.ajax)&&!this.s.dt.oFeatures.bServerSide?!0:!1)&&2==this.s.dt.iDraw||!c&&1==this.s.dt.iDraw)&&setTimeout(function(){e(a.dom.scroller).scrollTop(a.s.dt.oLoadedState.iScroller);a.s.redrawTop=a.s.dt.oLoadedState.iScroller-b.viewport/2;setTimeout(function(){a.s.ingnoreScroll=!1},0)},0):a.s.ingnoreScroll=!1;setTimeout(function(){a._fnInfo.call(a)},0);this.dom.loader&&this.s.loaderVisible&&(this.dom.loader.css("display","none"),this.s.loaderVisible=!1)},_fnScrollForce:function(){var a=this.s.heights;
a.virtual=a.row*this.s.dt.fnRecordsDisplay();a.scroll=a.virtual;1E6<a.scroll&&(a.scroll=1E6);this.dom.force.style.height=a.scroll>this.s.heights.row?a.scroll+"px":this.s.heights.row+"px"},_fnCalcRowHeight:function(){var a=this.s.dt,b=a.nTable,c=b.cloneNode(!1),d=e("<tbody/>").appendTo(c),f=e('<div class="'+a.oClasses.sWrapper+' DTS"><div class="'+a.oClasses.sScrollWrapper+'"><div class="'+a.oClasses.sScrollBody+'"></div></div></div>');for(e("tbody tr:lt(4)",b).clone().appendTo(d);3>e("tr",d).length;)d.append("<tr><td>&nbsp;</td></tr>");
e("div."+a.oClasses.sScrollBody,f).append(c);f.appendTo(this.s.dt.nHolding||b.parentNode);this.s.heights.row=e("tr",d).eq(1).outerHeight();f.remove()},_fnInfo:function(){if(this.s.dt.oFeatures.bInfo){var a=this.s.dt,b=a.oLanguage,c=this.dom.scroller.scrollTop,d=Math.floor(this.fnPixelsToRow(c,!1,this.s.ani)+1),f=a.fnRecordsTotal(),g=a.fnRecordsDisplay(),c=Math.ceil(this.fnPixelsToRow(c+this.s.heights.viewport,!1,this.s.ani)),c=g<c?g:c,h=a.fnFormatNumber(d),l=a.fnFormatNumber(c),j=a.fnFormatNumber(f),
k=a.fnFormatNumber(g),h=0===a.fnRecordsDisplay()&&a.fnRecordsDisplay()==a.fnRecordsTotal()?b.sInfoEmpty+b.sInfoPostFix:0===a.fnRecordsDisplay()?b.sInfoEmpty+" "+b.sInfoFiltered.replace("_MAX_",j)+b.sInfoPostFix:a.fnRecordsDisplay()==a.fnRecordsTotal()?b.sInfo.replace("_START_",h).replace("_END_",l).replace("_MAX_",j).replace("_TOTAL_",k)+b.sInfoPostFix:b.sInfo.replace("_START_",h).replace("_END_",l).replace("_MAX_",j).replace("_TOTAL_",k)+" "+b.sInfoFiltered.replace("_MAX_",a.fnFormatNumber(a.fnRecordsTotal()))+
b.sInfoPostFix;(b=b.fnInfoCallback)&&(h=b.call(a.oInstance,a,d,c,f,g,h));a=a.aanFeatures.i;if("undefined"!=typeof a){d=0;for(f=a.length;d<f;d++)e(a[d]).html(h)}}}};g.defaults={trace:!1,rowHeight:"auto",serverWait:200,displayBuffer:9,boundaryScale:0.5,loadingIndicator:!1};g.oDefaults=g.defaults;g.version="1.3.0";"function"==typeof e.fn.dataTable&&"function"==typeof e.fn.dataTableExt.fnVersionCheck&&e.fn.dataTableExt.fnVersionCheck("1.10.0")?e.fn.dataTableExt.aoFeatures.push({fnInit:function(a){var b=
a.oInit;new g(a,b.scroller||b.oScroller||{})},cFeature:"S",sFeature:"Scroller"}):alert("Warning: Scroller requires DataTables 1.10.0 or greater - www.datatables.net/download");e(n).on("preInit.dt.dtscroller",function(a,b){if("dt"===a.namespace){var c=b.oInit.scroller,d=j.defaults.scroller;if(c||d)d=e.extend({},c,d),!1!==c&&new g(b,d)}});e.fn.dataTable.Scroller=g;e.fn.DataTable.Scroller=g;var h=e.fn.dataTable.Api;h.register("scroller()",function(){return this});h.register("scroller().rowToPixels()",
function(a,b,c){var d=this.context;if(d.length&&d[0].oScroller)return d[0].oScroller.fnRowToPixels(a,b,c)});h.register("scroller().pixelsToRow()",function(a,b,c){var d=this.context;if(d.length&&d[0].oScroller)return d[0].oScroller.fnPixelsToRow(a,b,c)});h.register("scroller().scrollToRow()",function(a,b){this.iterator("table",function(c){c.oScroller&&c.oScroller.fnScrollToRow(a,b)});return this});h.register("row().scrollTo()",function(a){var b=this;this.iterator("row",function(c,d){if(c.oScroller){var e=
b.rows({order:"applied",search:"applied"}).indexes().indexOf(d);c.oScroller.fnScrollToRow(e,a)}});return this});h.register("scroller.measure()",function(a){this.iterator("table",function(b){b.oScroller&&b.oScroller.fnMeasure(a)});return this});return g};"function"===typeof define&&define.amd?define(["jquery","datatables"],j):"object"===typeof exports?j(require("jquery"),require("datatables")):jQuery&&!jQuery.fn.dataTable.Scroller&&j(jQuery,jQuery.fn.dataTable)})(window,document);



+ 1121
- 0
interface/js/rspamd.js
File diff suppressed because it is too large
View File


+ 2
- 0
interface/plugins.txt View File

@@ -0,0 +1,2 @@
https://github.com/valums/file-uploader/
https://github.com/flot/flot

+ 18
- 0
interface/react-index.html View File

@@ -0,0 +1,18 @@
<html lang="en">
<head>
<meta charset="utf-8">
<title>RSPAMD Admin</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="">
<meta name="author" content="">
<link href="./css/fineuploader.min.css" rel="stylesheet">
<link href="./css/bootstrap.min.css" rel="stylesheet">
<link href="./css/rspamd.css" rel="stylesheet">
</head>

<body>
<script src="./js/jquery-2.1.4.min.js"></script>
<script src="./js/bootstrap.min.js"></script>
<script src="bundle.js"></script>
</body>
</hml>

Loading…
Cancel
Save