#!/usr/bin/env perl use strict; use 5.006; use warnings; my $progname = $0; if (scalar @ARGV < 4 || scalar @ARGV > 5) { print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<enc>]\n"; exit 1; } my $outname = shift(@ARGV); my $c_src_dir = shift(@ARGV); my $descfile = shift(@ARGV); my $srclistfile = shift(@ARGV); my $enc_only; my $extn = ''; if (@ARGV) { $enc_only = shift(@ARGV); $extn = '_'.$enc_only; } my %aliases = (); my %algorithms = (); my %algorithm_encs = (); my %encs = (); sub addalgenc($$) { my $alg = shift(); my $enc = shift(); if (defined $enc_only) { my $norm_enc = lc $enc; $norm_enc =~ s/_//g; if ($norm_enc ne $enc_only) { return; } } if (defined $algorithm_encs{$alg}) { my $hashref = $algorithm_encs{$alg}; $$hashref{$enc}=1; } else { my %newhash = ($enc => 1); $algorithm_encs{$alg}=\%newhash; } $encs{$enc} = 1; } sub readinput() { open DESCFILE, $descfile; my $line; while ($line = <DESCFILE>) { next if $line =~ m/^\s*#/; next if $line =~ m/^\s*$/; my ($alg,$encstr,$aliases) = split(/\s+/, $line); my $enc; my $alias; $algorithms{$alg} = 1; foreach $alias (split(/,/, $aliases)) { foreach $enc (split(/,/, $encstr)) { # print "$alias, $enc\n"; $aliases{$alias} = $alg; addalgenc($alg, $enc); } } } } sub printoutput() { open (OUT, ">$outname") or die "Can't open output file `$outname': $!\n"; print OUT <<EOS; /* $outname: List of stemming modules. * * This file is generated by mkmodules.pl from a list of module names. * Do not edit manually. * EOS my $line = " * Modules included by this file are: "; print OUT $line; my $linelen = length($line); my $need_sep = 0; my $lang; my $enc; my @algorithms = sort keys(%algorithms); foreach $lang (@algorithms) { if ($need_sep) { if (($linelen + 2 + length($lang)) > 77) { print OUT ",\n * "; $linelen = 3; } else { print OUT ', '; $linelen += 2; } } print OUT $lang; $linelen += length($lang); $need_sep = 1; } print OUT "\n */\n\n"; foreach $lang (@algorithms) { my $hashref = $algorithm_encs{$lang}; foreach $enc (sort keys (%$hashref)) { print OUT "#include \"../$c_src_dir/stem_${enc}_$lang.h\"\n"; } } print OUT <<EOS; typedef enum { ENC_UNKNOWN=0, EOS my $neednl = 0; for $enc (sort keys %encs) { print OUT ",\n" if $neednl; print OUT " ENC_${enc}"; $neednl = 1; } print OUT <<EOS; } stemmer_encoding_t; struct stemmer_encoding { const char * name; stemmer_encoding_t enc; }; static const struct stemmer_encoding encodings[] = { EOS for $enc (sort keys %encs) { print OUT " {\"${enc}\", ENC_${enc}},\n"; } print OUT <<EOS; {0,ENC_UNKNOWN} }; struct stemmer_modules { const char * name; stemmer_encoding_t enc; struct SN_env * (*create)(void); void (*close)(struct SN_env *); int (*stem)(struct SN_env *); }; static const struct stemmer_modules modules[] = { EOS for $lang (sort keys %aliases) { my $l = $aliases{$lang}; my $hashref = $algorithm_encs{$l}; my $enc; foreach $enc (sort keys (%$hashref)) { my $p = "${l}_${enc}"; print OUT " {\"$lang\", ENC_$enc, ${p}_create_env, ${p}_close_env, ${p}_stem},\n"; } } print OUT <<EOS; {0,ENC_UNKNOWN,0,0,0} }; EOS print OUT <<EOS; static const char * algorithm_names[] = { EOS for $lang (@algorithms) { print OUT " \"$lang\", \n"; } print OUT <<EOS; 0 }; EOS close OUT or die "Can't close ${outname}: $!\n"; } sub printsrclist() { open (OUT, ">$srclistfile") or die "Can't open output file `$srclistfile': $!\n"; print OUT <<EOS; # $srclistfile: List of stemming module source files # # This file is generated by mkmodules.pl from a list of module names. # Do not edit manually. # EOS my $line = "# Modules included by this file are: "; print OUT $line; my $linelen = length($line); my $need_sep = 0; my $lang; my $srcfile; my $enc; my @algorithms = sort keys(%algorithms); foreach $lang (@algorithms) { if ($need_sep) { if (($linelen + 2 + length($lang)) > 77) { print OUT ",\n# "; $linelen = 3; } else { print OUT ', '; $linelen += 2; } } print OUT $lang; $linelen += length($lang); $need_sep = 1; } print OUT "\n\nsnowball_sources= \\\n"; for $lang (sort keys %aliases) { my $hashref = $algorithm_encs{$lang}; my $enc; foreach $enc (sort keys (%$hashref)) { print OUT " src_c/stem_${enc}_${lang}.c \\\n"; } } $need_sep = 0; for $srcfile ('runtime/api.c', 'runtime/utilities.c', "libstemmer/libstemmer${extn}.c") { print OUT " \\\n" if $need_sep; print OUT " $srcfile"; $need_sep = 1; } print OUT "\n\nsnowball_headers= \\\n"; for $lang (sort keys %aliases) { my $hashref = $algorithm_encs{$lang}; my $enc; foreach $enc (sort keys (%$hashref)) { my $p = "${lang}_${enc}"; print OUT " src_c/stem_${enc}_${lang}.h \\\n"; } } $need_sep = 0; for $srcfile ('include/libstemmer.h', "libstemmer/modules${extn}.h", 'runtime/api.h', 'runtime/header.h') { print OUT " \\\n" if $need_sep; print OUT " $srcfile"; $need_sep = 1; } print OUT "\n\n"; close OUT or die "Can't close ${srclistfile}: $!\n"; } readinput(); printoutput(); printsrclist();