aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/snowball/java/org
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-31 17:38:02 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-31 17:38:02 +0000
commit2375dba898b481837879940dfdcf3ea85248fe01 (patch)
treecced5fb680e9a362d1de25630bff537865d38365 /contrib/snowball/java/org
parent1543c98d38ffb84a1e405081436d0a25bee713a6 (diff)
downloadrspamd-2375dba898b481837879940dfdcf3ea85248fe01.tar.gz
rspamd-2375dba898b481837879940dfdcf3ea85248fe01.zip
Remove bloody submodules.
Diffstat (limited to 'contrib/snowball/java/org')
-rw-r--r--contrib/snowball/java/org/tartarus/snowball/Among.java31
-rw-r--r--contrib/snowball/java/org/tartarus/snowball/SnowballProgram.java432
-rw-r--r--contrib/snowball/java/org/tartarus/snowball/SnowballStemmer.java7
-rw-r--r--contrib/snowball/java/org/tartarus/snowball/TestApp.java77
4 files changed, 547 insertions, 0 deletions
diff --git a/contrib/snowball/java/org/tartarus/snowball/Among.java b/contrib/snowball/java/org/tartarus/snowball/Among.java
new file mode 100644
index 000000000..5ed37b503
--- /dev/null
+++ b/contrib/snowball/java/org/tartarus/snowball/Among.java
@@ -0,0 +1,31 @@
+package org.tartarus.snowball;
+
+import java.lang.reflect.Method;
+
+public class Among {
+ public Among (String s, int substring_i, int result,
+ String methodname, SnowballProgram methodobject) {
+ this.s_size = s.length();
+ this.s = s.toCharArray();
+ this.substring_i = substring_i;
+ this.result = result;
+ this.methodobject = methodobject;
+ if (methodname.length() == 0) {
+ this.method = null;
+ } else {
+ try {
+ this.method = methodobject.getClass().
+ getDeclaredMethod(methodname, new Class[0]);
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ public final int s_size; /* search string */
+ public final char[] s; /* search string */
+ public final int substring_i; /* index to longest matching substring */
+ public final int result; /* result of the lookup */
+ public final Method method; /* method to use if substring matches */
+ public final SnowballProgram methodobject; /* object to invoke method on */
+};
diff --git a/contrib/snowball/java/org/tartarus/snowball/SnowballProgram.java b/contrib/snowball/java/org/tartarus/snowball/SnowballProgram.java
new file mode 100644
index 000000000..52d6baa78
--- /dev/null
+++ b/contrib/snowball/java/org/tartarus/snowball/SnowballProgram.java
@@ -0,0 +1,432 @@
+
+package org.tartarus.snowball;
+import java.lang.reflect.InvocationTargetException;
+
+public class SnowballProgram {
+ protected SnowballProgram()
+ {
+ current = new StringBuffer();
+ setCurrent("");
+ }
+
+ /**
+ * Set the current string.
+ */
+ public void setCurrent(String value)
+ {
+ current.replace(0, current.length(), value);
+ cursor = 0;
+ limit = current.length();
+ limit_backward = 0;
+ bra = cursor;
+ ket = limit;
+ }
+
+ /**
+ * Get the current string.
+ */
+ public String getCurrent()
+ {
+ String result = current.toString();
+ // Make a new StringBuffer. If we reuse the old one, and a user of
+ // the library keeps a reference to the buffer returned (for example,
+ // by converting it to a String in a way which doesn't force a copy),
+ // the buffer size will not decrease, and we will risk wasting a large
+ // amount of memory.
+ // Thanks to Wolfram Esser for spotting this problem.
+ current = new StringBuffer();
+ return result;
+ }
+
+ // current string
+ protected StringBuffer current;
+
+ protected int cursor;
+ protected int limit;
+ protected int limit_backward;
+ protected int bra;
+ protected int ket;
+
+ protected void copy_from(SnowballProgram other)
+ {
+ current = other.current;
+ cursor = other.cursor;
+ limit = other.limit;
+ limit_backward = other.limit_backward;
+ bra = other.bra;
+ ket = other.ket;
+ }
+
+ protected boolean in_grouping(char [] s, int min, int max)
+ {
+ if (cursor >= limit) return false;
+ char ch = current.charAt(cursor);
+ if (ch > max || ch < min) return false;
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+ cursor++;
+ return true;
+ }
+
+ protected boolean in_grouping_b(char [] s, int min, int max)
+ {
+ if (cursor <= limit_backward) return false;
+ char ch = current.charAt(cursor - 1);
+ if (ch > max || ch < min) return false;
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+ cursor--;
+ return true;
+ }
+
+ protected boolean out_grouping(char [] s, int min, int max)
+ {
+ if (cursor >= limit) return false;
+ char ch = current.charAt(cursor);
+ if (ch > max || ch < min) {
+ cursor++;
+ return true;
+ }
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+ cursor ++;
+ return true;
+ }
+ return false;
+ }
+
+ protected boolean out_grouping_b(char [] s, int min, int max)
+ {
+ if (cursor <= limit_backward) return false;
+ char ch = current.charAt(cursor - 1);
+ if (ch > max || ch < min) {
+ cursor--;
+ return true;
+ }
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+ cursor--;
+ return true;
+ }
+ return false;
+ }
+
+ protected boolean in_range(int min, int max)
+ {
+ if (cursor >= limit) return false;
+ char ch = current.charAt(cursor);
+ if (ch > max || ch < min) return false;
+ cursor++;
+ return true;
+ }
+
+ protected boolean in_range_b(int min, int max)
+ {
+ if (cursor <= limit_backward) return false;
+ char ch = current.charAt(cursor - 1);
+ if (ch > max || ch < min) return false;
+ cursor--;
+ return true;
+ }
+
+ protected boolean out_range(int min, int max)
+ {
+ if (cursor >= limit) return false;
+ char ch = current.charAt(cursor);
+ if (!(ch > max || ch < min)) return false;
+ cursor++;
+ return true;
+ }
+
+ protected boolean out_range_b(int min, int max)
+ {
+ if (cursor <= limit_backward) return false;
+ char ch = current.charAt(cursor - 1);
+ if(!(ch > max || ch < min)) return false;
+ cursor--;
+ return true;
+ }
+
+ protected boolean eq_s(int s_size, String s)
+ {
+ if (limit - cursor < s_size) return false;
+ int i;
+ for (i = 0; i != s_size; i++) {
+ if (current.charAt(cursor + i) != s.charAt(i)) return false;
+ }
+ cursor += s_size;
+ return true;
+ }
+
+ protected boolean eq_s_b(int s_size, String s)
+ {
+ if (cursor - limit_backward < s_size) return false;
+ int i;
+ for (i = 0; i != s_size; i++) {
+ if (current.charAt(cursor - s_size + i) != s.charAt(i)) return false;
+ }
+ cursor -= s_size;
+ return true;
+ }
+
+ protected boolean eq_v(CharSequence s)
+ {
+ return eq_s(s.length(), s.toString());
+ }
+
+ protected boolean eq_v_b(CharSequence s)
+ { return eq_s_b(s.length(), s.toString());
+ }
+
+ protected int find_among(Among v[], int v_size)
+ {
+ int i = 0;
+ int j = v_size;
+
+ int c = cursor;
+ int l = limit;
+
+ int common_i = 0;
+ int common_j = 0;
+
+ boolean first_key_inspected = false;
+
+ while(true) {
+ int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j; // smaller
+ Among w = v[k];
+ int i2;
+ for (i2 = common; i2 < w.s_size; i2++) {
+ if (c + common == l) {
+ diff = -1;
+ break;
+ }
+ diff = current.charAt(c + common) - w.s[i2];
+ if (diff != 0) break;
+ common++;
+ }
+ if (diff < 0) {
+ j = k;
+ common_j = common;
+ } else {
+ i = k;
+ common_i = common;
+ }
+ if (j - i <= 1) {
+ if (i > 0) break; // v->s has been inspected
+ if (j == i) break; // only one item in v
+
+ // - but now we need to go round once more to get
+ // v->s inspected. This looks messy, but is actually
+ // the optimal approach.
+
+ if (first_key_inspected) break;
+ first_key_inspected = true;
+ }
+ }
+ while(true) {
+ Among w = v[i];
+ if (common_i >= w.s_size) {
+ cursor = c + w.s_size;
+ if (w.method == null) return w.result;
+ boolean res;
+ try {
+ Object resobj = w.method.invoke(w.methodobject,
+ new Object[0]);
+ res = resobj.toString().equals("true");
+ } catch (InvocationTargetException e) {
+ res = false;
+ // FIXME - debug message
+ } catch (IllegalAccessException e) {
+ res = false;
+ // FIXME - debug message
+ }
+ cursor = c + w.s_size;
+ if (res) return w.result;
+ }
+ i = w.substring_i;
+ if (i < 0) return 0;
+ }
+ }
+
+ // find_among_b is for backwards processing. Same comments apply
+ protected int find_among_b(Among v[], int v_size)
+ {
+ int i = 0;
+ int j = v_size;
+
+ int c = cursor;
+ int lb = limit_backward;
+
+ int common_i = 0;
+ int common_j = 0;
+
+ boolean first_key_inspected = false;
+
+ while(true) {
+ int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j;
+ Among w = v[k];
+ int i2;
+ for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
+ if (c - common == lb) {
+ diff = -1;
+ break;
+ }
+ diff = current.charAt(c - 1 - common) - w.s[i2];
+ if (diff != 0) break;
+ common++;
+ }
+ if (diff < 0) {
+ j = k;
+ common_j = common;
+ } else {
+ i = k;
+ common_i = common;
+ }
+ if (j - i <= 1) {
+ if (i > 0) break;
+ if (j == i) break;
+ if (first_key_inspected) break;
+ first_key_inspected = true;
+ }
+ }
+ while(true) {
+ Among w = v[i];
+ if (common_i >= w.s_size) {
+ cursor = c - w.s_size;
+ if (w.method == null) return w.result;
+
+ boolean res;
+ try {
+ Object resobj = w.method.invoke(w.methodobject,
+ new Object[0]);
+ res = resobj.toString().equals("true");
+ } catch (InvocationTargetException e) {
+ res = false;
+ // FIXME - debug message
+ } catch (IllegalAccessException e) {
+ res = false;
+ // FIXME - debug message
+ }
+ cursor = c - w.s_size;
+ if (res) return w.result;
+ }
+ i = w.substring_i;
+ if (i < 0) return 0;
+ }
+ }
+
+ /* to replace chars between c_bra and c_ket in current by the
+ * chars in s.
+ */
+ protected int replace_s(int c_bra, int c_ket, String s)
+ {
+ int adjustment = s.length() - (c_ket - c_bra);
+ current.replace(c_bra, c_ket, s);
+ limit += adjustment;
+ if (cursor >= c_ket) cursor += adjustment;
+ else if (cursor > c_bra) cursor = c_bra;
+ return adjustment;
+ }
+
+ protected void slice_check()
+ {
+ if (bra < 0 ||
+ bra > ket ||
+ ket > limit ||
+ limit > current.length()) // this line could be removed
+ {
+ System.err.println("faulty slice operation");
+ // FIXME: report error somehow.
+ /*
+ fprintf(stderr, "faulty slice operation:\n");
+ debug(z, -1, 0);
+ exit(1);
+ */
+ }
+ }
+
+ protected void slice_from(String s)
+ {
+ slice_check();
+ replace_s(bra, ket, s);
+ }
+
+ protected void slice_from(CharSequence s)
+ {
+ slice_from(s.toString());
+ }
+
+ protected void slice_del()
+ {
+ slice_from("");
+ }
+
+ protected void insert(int c_bra, int c_ket, String s)
+ {
+ int adjustment = replace_s(c_bra, c_ket, s);
+ if (c_bra <= bra) bra += adjustment;
+ if (c_bra <= ket) ket += adjustment;
+ }
+
+ protected void insert(int c_bra, int c_ket, CharSequence s)
+ {
+ insert(c_bra, c_ket, s.toString());
+ }
+
+ /* Copy the slice into the supplied StringBuffer */
+ protected StringBuffer slice_to(StringBuffer s)
+ {
+ slice_check();
+ int len = ket - bra;
+ s.replace(0, s.length(), current.substring(bra, ket));
+ return s;
+ }
+
+ /* Copy the slice into the supplied StringBuilder */
+ protected StringBuilder slice_to(StringBuilder s)
+ {
+ slice_check();
+ int len = ket - bra;
+ s.replace(0, s.length(), current.substring(bra, ket));
+ return s;
+ }
+
+ protected StringBuffer assign_to(StringBuffer s)
+ {
+ s.replace(0, s.length(), current.substring(0, limit));
+ return s;
+ }
+
+ protected StringBuilder assign_to(StringBuilder s)
+ {
+ s.replace(0, s.length(), current.substring(0, limit));
+ return s;
+ }
+
+/*
+extern void debug(struct SN_env * z, int number, int line_count)
+{ int i;
+ int limit = SIZE(z->p);
+ //if (number >= 0) printf("%3d (line %4d): '", number, line_count);
+ if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+ for (i = 0; i <= limit; i++)
+ { if (z->lb == i) printf("{");
+ if (z->bra == i) printf("[");
+ if (z->c == i) printf("|");
+ if (z->ket == i) printf("]");
+ if (z->l == i) printf("}");
+ if (i < limit)
+ { int ch = z->p[i];
+ if (ch == 0) ch = '#';
+ printf("%c", ch);
+ }
+ }
+ printf("'\n");
+}
+*/
+
+};
diff --git a/contrib/snowball/java/org/tartarus/snowball/SnowballStemmer.java b/contrib/snowball/java/org/tartarus/snowball/SnowballStemmer.java
new file mode 100644
index 000000000..960bd55f6
--- /dev/null
+++ b/contrib/snowball/java/org/tartarus/snowball/SnowballStemmer.java
@@ -0,0 +1,7 @@
+
+package org.tartarus.snowball;
+import java.lang.reflect.InvocationTargetException;
+
+public abstract class SnowballStemmer extends SnowballProgram {
+ public abstract boolean stem();
+};
diff --git a/contrib/snowball/java/org/tartarus/snowball/TestApp.java b/contrib/snowball/java/org/tartarus/snowball/TestApp.java
new file mode 100644
index 000000000..38803f673
--- /dev/null
+++ b/contrib/snowball/java/org/tartarus/snowball/TestApp.java
@@ -0,0 +1,77 @@
+
+package org.tartarus.snowball;
+
+import java.lang.reflect.Method;
+import java.io.Reader;
+import java.io.Writer;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.OutputStream;
+import java.io.FileOutputStream;
+
+public class TestApp {
+ private static void usage()
+ {
+ System.err.println("Usage: TestApp <algorithm> <input file> [-o <output file>]");
+ }
+
+ public static void main(String [] args) throws Throwable {
+ if (args.length < 2) {
+ usage();
+ return;
+ }
+
+ Class stemClass = Class.forName("org.tartarus.snowball.ext." +
+ args[0] + "Stemmer");
+ SnowballStemmer stemmer = (SnowballStemmer) stemClass.newInstance();
+
+ Reader reader;
+ reader = new InputStreamReader(new FileInputStream(args[1]));
+ reader = new BufferedReader(reader);
+
+ StringBuffer input = new StringBuffer();
+
+ OutputStream outstream;
+
+ if (args.length > 2) {
+ if (args.length >= 4 && args[2].equals("-o")) {
+ outstream = new FileOutputStream(args[3]);
+ } else {
+ usage();
+ return;
+ }
+ } else {
+ outstream = System.out;
+ }
+ Writer output = new OutputStreamWriter(outstream);
+ output = new BufferedWriter(output);
+
+ int repeat = 1;
+ if (args.length > 4) {
+ repeat = Integer.parseInt(args[4]);
+ }
+
+ Object [] emptyArgs = new Object[0];
+ int character;
+ while ((character = reader.read()) != -1) {
+ char ch = (char) character;
+ if (Character.isWhitespace((char) ch)) {
+ if (input.length() > 0) {
+ stemmer.setCurrent(input.toString());
+ for (int i = repeat; i != 0; i--) {
+ stemmer.stem();
+ }
+ output.write(stemmer.getCurrent());
+ output.write('\n');
+ input.delete(0, input.length());
+ }
+ } else {
+ input.append(Character.toLowerCase(ch));
+ }
+ }
+ output.flush();
+ }
+}