選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

mkmodules.pl 6.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. #!/usr/bin/env perl
  2. use strict;
  3. use 5.006;
  4. use warnings;
  5. my $progname = $0;
  6. if (scalar @ARGV < 4 || scalar @ARGV > 5) {
  7. print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<enc>]\n";
  8. exit 1;
  9. }
  10. my $outname = shift(@ARGV);
  11. my $c_src_dir = shift(@ARGV);
  12. my $descfile = shift(@ARGV);
  13. my $srclistfile = shift(@ARGV);
  14. my $enc_only;
  15. my $extn = '';
  16. if (@ARGV) {
  17. $enc_only = shift(@ARGV);
  18. $extn = '_'.$enc_only;
  19. }
  20. my %aliases = ();
  21. my %algorithms = ();
  22. my %algorithm_encs = ();
  23. my %encs = ();
  24. sub addalgenc($$) {
  25. my $alg = shift();
  26. my $enc = shift();
  27. if (defined $enc_only) {
  28. my $norm_enc = lc $enc;
  29. $norm_enc =~ s/_//g;
  30. if ($norm_enc ne $enc_only) {
  31. return;
  32. }
  33. }
  34. if (defined $algorithm_encs{$alg}) {
  35. my $hashref = $algorithm_encs{$alg};
  36. $$hashref{$enc}=1;
  37. } else {
  38. my %newhash = ($enc => 1);
  39. $algorithm_encs{$alg}=\%newhash;
  40. }
  41. $encs{$enc} = 1;
  42. }
  43. sub readinput()
  44. {
  45. open DESCFILE, $descfile;
  46. my $line;
  47. while ($line = <DESCFILE>)
  48. {
  49. next if $line =~ m/^\s*#/;
  50. next if $line =~ m/^\s*$/;
  51. my ($alg,$encstr,$aliases) = split(/\s+/, $line);
  52. my $enc;
  53. my $alias;
  54. $algorithms{$alg} = 1;
  55. foreach $alias (split(/,/, $aliases)) {
  56. foreach $enc (split(/,/, $encstr)) {
  57. # print "$alias, $enc\n";
  58. $aliases{$alias} = $alg;
  59. addalgenc($alg, $enc);
  60. }
  61. }
  62. }
  63. }
  64. sub printoutput()
  65. {
  66. open (OUT, ">$outname") or die "Can't open output file `$outname': $!\n";
  67. print OUT <<EOS;
  68. /* $outname: List of stemming modules.
  69. *
  70. * This file is generated by mkmodules.pl from a list of module names.
  71. * Do not edit manually.
  72. *
  73. EOS
  74. my $line = " * Modules included by this file are: ";
  75. print OUT $line;
  76. my $linelen = length($line);
  77. my $need_sep = 0;
  78. my $lang;
  79. my $enc;
  80. my @algorithms = sort keys(%algorithms);
  81. foreach $lang (@algorithms) {
  82. if ($need_sep) {
  83. if (($linelen + 2 + length($lang)) > 77) {
  84. print OUT ",\n * ";
  85. $linelen = 3;
  86. } else {
  87. print OUT ', ';
  88. $linelen += 2;
  89. }
  90. }
  91. print OUT $lang;
  92. $linelen += length($lang);
  93. $need_sep = 1;
  94. }
  95. print OUT "\n */\n\n";
  96. foreach $lang (@algorithms) {
  97. my $hashref = $algorithm_encs{$lang};
  98. foreach $enc (sort keys (%$hashref)) {
  99. print OUT "#include \"../$c_src_dir/stem_${enc}_$lang.h\"\n";
  100. }
  101. }
  102. print OUT <<EOS;
  103. typedef enum {
  104. ENC_UNKNOWN=0,
  105. EOS
  106. my $neednl = 0;
  107. for $enc (sort keys %encs) {
  108. print OUT ",\n" if $neednl;
  109. print OUT " ENC_${enc}";
  110. $neednl = 1;
  111. }
  112. print OUT <<EOS;
  113. } stemmer_encoding_t;
  114. struct stemmer_encoding {
  115. const char * name;
  116. stemmer_encoding_t enc;
  117. };
  118. static const struct stemmer_encoding encodings[] = {
  119. EOS
  120. for $enc (sort keys %encs) {
  121. print OUT " {\"${enc}\", ENC_${enc}},\n";
  122. }
  123. print OUT <<EOS;
  124. {0,ENC_UNKNOWN}
  125. };
  126. struct stemmer_modules {
  127. const char * name;
  128. stemmer_encoding_t enc;
  129. struct SN_env * (*create)(void);
  130. void (*close)(struct SN_env *);
  131. int (*stem)(struct SN_env *);
  132. };
  133. static const struct stemmer_modules modules[] = {
  134. EOS
  135. for $lang (sort keys %aliases) {
  136. my $l = $aliases{$lang};
  137. my $hashref = $algorithm_encs{$l};
  138. my $enc;
  139. foreach $enc (sort keys (%$hashref)) {
  140. my $p = "${l}_${enc}";
  141. print OUT " {\"$lang\", ENC_$enc, ${p}_create_env, ${p}_close_env, ${p}_stem},\n";
  142. }
  143. }
  144. print OUT <<EOS;
  145. {0,ENC_UNKNOWN,0,0,0}
  146. };
  147. EOS
  148. print OUT <<EOS;
  149. static const char * algorithm_names[] = {
  150. EOS
  151. for $lang (@algorithms) {
  152. print OUT " \"$lang\", \n";
  153. }
  154. print OUT <<EOS;
  155. 0
  156. };
  157. EOS
  158. close OUT or die "Can't close ${outname}: $!\n";
  159. }
  160. sub printsrclist()
  161. {
  162. open (OUT, ">$srclistfile") or die "Can't open output file `$srclistfile': $!\n";
  163. print OUT <<EOS;
  164. # $srclistfile: List of stemming module source files
  165. #
  166. # This file is generated by mkmodules.pl from a list of module names.
  167. # Do not edit manually.
  168. #
  169. EOS
  170. my $line = "# Modules included by this file are: ";
  171. print OUT $line;
  172. my $linelen = length($line);
  173. my $need_sep = 0;
  174. my $lang;
  175. my $srcfile;
  176. my $enc;
  177. my @algorithms = sort keys(%algorithms);
  178. foreach $lang (@algorithms) {
  179. if ($need_sep) {
  180. if (($linelen + 2 + length($lang)) > 77) {
  181. print OUT ",\n# ";
  182. $linelen = 3;
  183. } else {
  184. print OUT ', ';
  185. $linelen += 2;
  186. }
  187. }
  188. print OUT $lang;
  189. $linelen += length($lang);
  190. $need_sep = 1;
  191. }
  192. print OUT "\n\nsnowball_sources= \\\n";
  193. for $lang (sort keys %aliases) {
  194. my $hashref = $algorithm_encs{$lang};
  195. my $enc;
  196. foreach $enc (sort keys (%$hashref)) {
  197. print OUT " src_c/stem_${enc}_${lang}.c \\\n";
  198. }
  199. }
  200. $need_sep = 0;
  201. for $srcfile ('runtime/api.c',
  202. 'runtime/utilities.c',
  203. "libstemmer/libstemmer${extn}.c") {
  204. print OUT " \\\n" if $need_sep;
  205. print OUT " $srcfile";
  206. $need_sep = 1;
  207. }
  208. print OUT "\n\nsnowball_headers= \\\n";
  209. for $lang (sort keys %aliases) {
  210. my $hashref = $algorithm_encs{$lang};
  211. my $enc;
  212. foreach $enc (sort keys (%$hashref)) {
  213. my $p = "${lang}_${enc}";
  214. print OUT " src_c/stem_${enc}_${lang}.h \\\n";
  215. }
  216. }
  217. $need_sep = 0;
  218. for $srcfile ('include/libstemmer.h',
  219. "libstemmer/modules${extn}.h",
  220. 'runtime/api.h',
  221. 'runtime/header.h') {
  222. print OUT " \\\n" if $need_sep;
  223. print OUT " $srcfile";
  224. $need_sep = 1;
  225. }
  226. print OUT "\n\n";
  227. close OUT or die "Can't close ${srclistfile}: $!\n";
  228. }
  229. readinput();
  230. printoutput();
  231. printsrclist();