diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-02-25 09:55:31 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-02-25 09:55:31 +0000 |
commit | b87995255fa2ef0de97d509b8cd27860f014e90f (patch) | |
tree | ff7fcc84aa85fcd4cd129d94f6fb23ac5f91d4cb /contrib/snowball/libstemmer/mkmodules.pl | |
parent | 52154a6c1dd7e46c174d4aab782494b92f955df5 (diff) | |
download | rspamd-b87995255fa2ef0de97d509b8cd27860f014e90f.tar.gz rspamd-b87995255fa2ef0de97d509b8cd27860f014e90f.zip |
[Rework] Update snowball stemmer to 2.0 and remove all crap aside of UTF8
Diffstat (limited to 'contrib/snowball/libstemmer/mkmodules.pl')
-rwxr-xr-x | contrib/snowball/libstemmer/mkmodules.pl | 25 |
1 files changed, 18 insertions, 7 deletions
diff --git a/contrib/snowball/libstemmer/mkmodules.pl b/contrib/snowball/libstemmer/mkmodules.pl index ff8c19e7c..dd6678759 100755 --- a/contrib/snowball/libstemmer/mkmodules.pl +++ b/contrib/snowball/libstemmer/mkmodules.pl @@ -1,10 +1,12 @@ -#!/usr/bin/perl -w +#!/usr/bin/env perl use strict; +use 5.006; +use warnings; my $progname = $0; if (scalar @ARGV < 4 || scalar @ARGV > 5) { - print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<extn>]\n"; + print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<enc>]\n"; exit 1; } @@ -12,9 +14,11 @@ my $outname = shift(@ARGV); my $c_src_dir = shift(@ARGV); my $descfile = shift(@ARGV); my $srclistfile = shift(@ARGV); +my $enc_only; my $extn = ''; if (@ARGV) { - $extn = '_'.shift(@ARGV); + $enc_only = shift(@ARGV); + $extn = '_'.$enc_only; } my %aliases = (); @@ -27,6 +31,14 @@ sub addalgenc($$) { my $alg = shift(); my $enc = shift(); + if (defined $enc_only) { + my $norm_enc = lc $enc; + $norm_enc =~ s/_//g; + if ($norm_enc ne $enc_only) { + return; + } + } + if (defined $algorithm_encs{$alg}) { my $hashref = $algorithm_encs{$alg}; $$hashref{$enc}=1; @@ -42,7 +54,7 @@ sub readinput() { open DESCFILE, $descfile; my $line; - while($line = <DESCFILE>) + while ($line = <DESCFILE>) { next if $line =~ m/^\s*#/; next if $line =~ m/^\s*$/; @@ -123,7 +135,7 @@ struct stemmer_encoding { const char * name; stemmer_encoding_t enc; }; -static struct stemmer_encoding encodings[] = { +static const struct stemmer_encoding encodings[] = { EOS for $enc (sort keys %encs) { print OUT " {\"${enc}\", ENC_${enc}},\n"; @@ -139,7 +151,7 @@ struct stemmer_modules { void (*close)(struct SN_env *); int (*stem)(struct SN_env *); }; -static struct stemmer_modules modules[] = { +static const struct stemmer_modules modules[] = { EOS for $lang (sort keys %aliases) { @@ -162,7 +174,6 @@ static const char * algorithm_names[] = { EOS for $lang (@algorithms) { - my $l = $aliases{$lang}; print OUT " \"$lang\", \n"; } |