aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/snowball/libstemmer/mkmodules.pl
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-02-25 09:55:31 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-02-25 09:55:31 +0000
commitb87995255fa2ef0de97d509b8cd27860f014e90f (patch)
treeff7fcc84aa85fcd4cd129d94f6fb23ac5f91d4cb /contrib/snowball/libstemmer/mkmodules.pl
parent52154a6c1dd7e46c174d4aab782494b92f955df5 (diff)
downloadrspamd-b87995255fa2ef0de97d509b8cd27860f014e90f.tar.gz
rspamd-b87995255fa2ef0de97d509b8cd27860f014e90f.zip
[Rework] Update snowball stemmer to 2.0 and remove all crap aside of UTF8
Diffstat (limited to 'contrib/snowball/libstemmer/mkmodules.pl')
-rwxr-xr-xcontrib/snowball/libstemmer/mkmodules.pl25
1 files changed, 18 insertions, 7 deletions
diff --git a/contrib/snowball/libstemmer/mkmodules.pl b/contrib/snowball/libstemmer/mkmodules.pl
index ff8c19e7c..dd6678759 100755
--- a/contrib/snowball/libstemmer/mkmodules.pl
+++ b/contrib/snowball/libstemmer/mkmodules.pl
@@ -1,10 +1,12 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
use strict;
+use 5.006;
+use warnings;
my $progname = $0;
if (scalar @ARGV < 4 || scalar @ARGV > 5) {
- print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<extn>]\n";
+ print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<enc>]\n";
exit 1;
}
@@ -12,9 +14,11 @@ my $outname = shift(@ARGV);
my $c_src_dir = shift(@ARGV);
my $descfile = shift(@ARGV);
my $srclistfile = shift(@ARGV);
+my $enc_only;
my $extn = '';
if (@ARGV) {
- $extn = '_'.shift(@ARGV);
+ $enc_only = shift(@ARGV);
+ $extn = '_'.$enc_only;
}
my %aliases = ();
@@ -27,6 +31,14 @@ sub addalgenc($$) {
my $alg = shift();
my $enc = shift();
+ if (defined $enc_only) {
+ my $norm_enc = lc $enc;
+ $norm_enc =~ s/_//g;
+ if ($norm_enc ne $enc_only) {
+ return;
+ }
+ }
+
if (defined $algorithm_encs{$alg}) {
my $hashref = $algorithm_encs{$alg};
$$hashref{$enc}=1;
@@ -42,7 +54,7 @@ sub readinput()
{
open DESCFILE, $descfile;
my $line;
- while($line = <DESCFILE>)
+ while ($line = <DESCFILE>)
{
next if $line =~ m/^\s*#/;
next if $line =~ m/^\s*$/;
@@ -123,7 +135,7 @@ struct stemmer_encoding {
const char * name;
stemmer_encoding_t enc;
};
-static struct stemmer_encoding encodings[] = {
+static const struct stemmer_encoding encodings[] = {
EOS
for $enc (sort keys %encs) {
print OUT " {\"${enc}\", ENC_${enc}},\n";
@@ -139,7 +151,7 @@ struct stemmer_modules {
void (*close)(struct SN_env *);
int (*stem)(struct SN_env *);
};
-static struct stemmer_modules modules[] = {
+static const struct stemmer_modules modules[] = {
EOS
for $lang (sort keys %aliases) {
@@ -162,7 +174,6 @@ static const char * algorithm_names[] = {
EOS
for $lang (@algorithms) {
- my $l = $aliases{$lang};
print OUT " \"$lang\", \n";
}