You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GNUmakefile 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. # -*- makefile -*-
  2. c_src_dir = src_c
  3. java_src_main_dir = java/org/tartarus/snowball
  4. java_src_dir = $(java_src_main_dir)/ext
  5. libstemmer_algorithms = danish dutch english finnish french german hungarian \
  6. italian \
  7. norwegian porter portuguese romanian \
  8. russian spanish swedish turkish
  9. KOI8_R_algorithms = russian
  10. ISO_8859_1_algorithms = danish dutch english finnish french german italian \
  11. norwegian porter portuguese spanish swedish
  12. ISO_8859_2_algorithms = hungarian romanian
  13. other_algorithms = german2 kraaij_pohlmann lovins
  14. all_algorithms = $(libstemmer_algorithms) $(other_algorithms)
  15. COMPILER_SOURCES = compiler/space.c \
  16. compiler/tokeniser.c \
  17. compiler/analyser.c \
  18. compiler/generator.c \
  19. compiler/driver.c \
  20. compiler/generator_java.c
  21. COMPILER_HEADERS = compiler/header.h \
  22. compiler/syswords.h \
  23. compiler/syswords2.h
  24. RUNTIME_SOURCES = runtime/api.c \
  25. runtime/utilities.c
  26. RUNTIME_HEADERS = runtime/api.h \
  27. runtime/header.h
  28. JAVARUNTIME_SOURCES = java/org/tartarus/snowball/Among.java \
  29. java/org/tartarus/snowball/SnowballProgram.java \
  30. java/org/tartarus/snowball/SnowballStemmer.java \
  31. java/org/tartarus/snowball/TestApp.java
  32. LIBSTEMMER_SOURCES = libstemmer/libstemmer.c
  33. LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
  34. LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
  35. LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in
  36. STEMWORDS_SOURCES = examples/stemwords.c
  37. ALL_ALGORITHM_FILES = $(all_algorithms:%=algorithms/%/stem*.sbl)
  38. C_LIB_SOURCES = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c) \
  39. $(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.c) \
  40. $(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.c) \
  41. $(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.c)
  42. C_LIB_HEADERS = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h) \
  43. $(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.h) \
  44. $(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.h) \
  45. $(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.h)
  46. C_OTHER_SOURCES = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c)
  47. C_OTHER_HEADERS = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h)
  48. JAVA_SOURCES = $(libstemmer_algorithms:%=$(java_src_dir)/%Stemmer.java)
  49. COMPILER_OBJECTS=$(COMPILER_SOURCES:.c=.o)
  50. RUNTIME_OBJECTS=$(RUNTIME_SOURCES:.c=.o)
  51. LIBSTEMMER_OBJECTS=$(LIBSTEMMER_SOURCES:.c=.o)
  52. LIBSTEMMER_UTF8_OBJECTS=$(LIBSTEMMER_UTF8_SOURCES:.c=.o)
  53. STEMWORDS_OBJECTS=$(STEMWORDS_SOURCES:.c=.o)
  54. C_LIB_OBJECTS = $(C_LIB_SOURCES:.c=.o)
  55. C_OTHER_OBJECTS = $(C_OTHER_SOURCES:.c=.o)
  56. JAVA_CLASSES = $(JAVA_SOURCES:.java=.class)
  57. JAVA_RUNTIME_CLASSES=$(JAVARUNTIME_SOURCES:.java=.class)
  58. CFLAGS=-Iinclude -O2
  59. CPPFLAGS=-W -Wall -Wmissing-prototypes -Wmissing-declarations
  60. all: snowball libstemmer.o stemwords $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS)
  61. clean:
  62. rm -f $(COMPILER_OBJECTS) $(RUNTIME_OBJECTS) \
  63. $(LIBSTEMMER_OBJECTS) $(LIBSTEMMER_UTF8_OBJECTS) $(STEMWORDS_OBJECTS) snowball \
  64. libstemmer.o stemwords \
  65. libstemmer/modules.h \
  66. libstemmer/modules_utf8.h \
  67. snowball.splint \
  68. $(C_LIB_SOURCES) $(C_LIB_HEADERS) $(C_LIB_OBJECTS) \
  69. $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) \
  70. $(JAVA_SOURCES) $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES) \
  71. libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \
  72. libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c
  73. rm -rf dist
  74. rmdir $(c_src_dir) || true
  75. snowball: $(COMPILER_OBJECTS)
  76. $(CC) -o $@ $^
  77. $(COMPILER_OBJECTS): $(COMPILER_HEADERS)
  78. libstemmer/libstemmer.c: libstemmer/libstemmer_c.in
  79. sed 's/@MODULES_H@/modules.h/' $^ >$@
  80. libstemmer/libstemmer_utf8.c: libstemmer/libstemmer_c.in
  81. sed 's/@MODULES_H@/modules_utf8.h/' $^ >$@
  82. libstemmer/modules.h libstemmer/mkinc.mak: libstemmer/mkmodules.pl libstemmer/modules.txt
  83. libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules.txt libstemmer/mkinc.mak
  84. libstemmer/modules_utf8.h libstemmer/mkinc_utf8.mak: libstemmer/mkmodules.pl libstemmer/modules_utf8.txt
  85. libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules_utf8.txt libstemmer/mkinc_utf8.mak utf8
  86. libstemmer/libstemmer.o: libstemmer/modules.h $(C_LIB_HEADERS)
  87. libstemmer.o: libstemmer/libstemmer.o $(RUNTIME_OBJECTS) $(C_LIB_OBJECTS)
  88. $(AR) -cru $@ $^
  89. stemwords: $(STEMWORDS_OBJECTS) libstemmer.o
  90. $(CC) -o $@ $^
  91. algorithms/%/stem_Unicode.sbl: algorithms/%/stem_ISO_8859_1.sbl
  92. cp $^ $@
  93. $(c_src_dir)/stem_UTF_8_%.c $(c_src_dir)/stem_UTF_8_%.h: algorithms/%/stem_Unicode.sbl snowball
  94. @mkdir -p $(c_src_dir)
  95. @l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
  96. o="$(c_src_dir)/stem_UTF_8_$${l}"; \
  97. echo "./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u"; \
  98. ./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u
  99. $(c_src_dir)/stem_KOI8_R_%.c $(c_src_dir)/stem_KOI8_R_%.h: algorithms/%/stem_KOI8_R.sbl snowball
  100. @mkdir -p $(c_src_dir)
  101. @l=`echo "$<" | sed 's!\(.*\)/stem_KOI8_R.sbl$$!\1!;s!^.*/!!'`; \
  102. o="$(c_src_dir)/stem_KOI8_R_$${l}"; \
  103. echo "./snowball $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime"; \
  104. ./snowball $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime
  105. $(c_src_dir)/stem_ISO_8859_1_%.c $(c_src_dir)/stem_ISO_8859_1_%.h: algorithms/%/stem_ISO_8859_1.sbl snowball
  106. @mkdir -p $(c_src_dir)
  107. @l=`echo "$<" | sed 's!\(.*\)/stem_ISO_8859_1.sbl$$!\1!;s!^.*/!!'`; \
  108. o="$(c_src_dir)/stem_ISO_8859_1_$${l}"; \
  109. echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime"; \
  110. ./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime
  111. $(c_src_dir)/stem_ISO_8859_2_%.c $(c_src_dir)/stem_ISO_8859_2_%.h: algorithms/%/stem_ISO_8859_2.sbl snowball
  112. @mkdir -p $(c_src_dir)
  113. @l=`echo "$<" | sed 's!\(.*\)/stem_ISO_8859_2.sbl$$!\1!;s!^.*/!!'`; \
  114. o="$(c_src_dir)/stem_ISO_8859_2_$${l}"; \
  115. echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime"; \
  116. ./snowball $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime
  117. $(c_src_dir)/stem_%.o: $(c_src_dir)/stem_%.c $(c_src_dir)/stem_%.h
  118. $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
  119. $(java_src_dir)/%Stemmer.java: algorithms/%/stem_Unicode.sbl snowball
  120. @mkdir -p $(java_src_dir)
  121. @l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
  122. o="$(java_src_dir)/$${l}Stemmer"; \
  123. echo "./snowball $< -j -o $${o} -p \"org.tartarus.snowball.SnowballStemmer\" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer"; \
  124. ./snowball $< -j -o $${o} -p "org.tartarus.snowball.SnowballStemmer" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer
  125. splint: snowball.splint
  126. snowball.splint: $(COMPILER_SOURCES)
  127. splint $^ >$@ -weak
  128. # Make a full source distribution
  129. dist: dist_snowball dist_libstemmer_c dist_libstemmer_java
  130. # Make a distribution of all the sources involved in snowball
  131. dist_snowball: $(COMPILER_SOURCES) $(COMPILER_HEADERS) \
  132. $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
  133. $(LIBSTEMMER_SOURCES) \
  134. $(LIBSTEMMER_UTF8_SOURCES) \
  135. $(LIBSTEMMER_HEADERS) \
  136. $(LIBSTEMMER_EXTRA) \
  137. $(ALL_ALGORITHM_FILES) $(STEMWORDS_SOURCES) \
  138. GNUmakefile README doc/TODO libstemmer/mkmodules.pl
  139. destname=snowball_code; \
  140. dest=dist/$${destname}; \
  141. rm -rf $${dest} && \
  142. rm -f $${dest}.tgz && \
  143. for file in $^; do \
  144. dir=`dirname $$file` && \
  145. mkdir -p $${dest}/$${dir} && \
  146. cp -a $${file} $${dest}/$${dir} || exit 1 ; \
  147. done && \
  148. (cd dist && tar zcf $${destname}.tgz $${destname}) && \
  149. rm -rf $${dest}
  150. # Make a distribution of all the sources required to compile the C library.
  151. dist_libstemmer_c: \
  152. $(RUNTIME_SOURCES) \
  153. $(RUNTIME_HEADERS) \
  154. $(LIBSTEMMER_SOURCES) \
  155. $(LIBSTEMMER_UTF8_SOURCES) \
  156. $(LIBSTEMMER_HEADERS) \
  157. $(LIBSTEMMER_EXTRA) \
  158. $(C_LIB_SOURCES) \
  159. $(C_LIB_HEADERS) \
  160. libstemmer/mkinc.mak \
  161. libstemmer/mkinc_utf8.mak
  162. destname=libstemmer_c; \
  163. dest=dist/$${destname}; \
  164. rm -rf $${dest} && \
  165. rm -f $${dest}.tgz && \
  166. mkdir -p $${dest} && \
  167. cp -a doc/libstemmer_c_README $${dest}/README && \
  168. mkdir -p $${dest}/examples && \
  169. cp -a examples/stemwords.c $${dest}/examples && \
  170. mkdir -p $${dest}/$(c_src_dir) && \
  171. cp -a $(C_LIB_SOURCES) $(C_LIB_HEADERS) $${dest}/$(c_src_dir) && \
  172. mkdir -p $${dest}/runtime && \
  173. cp -a $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) $${dest}/runtime && \
  174. mkdir -p $${dest}/libstemmer && \
  175. cp -a $(LIBSTEMMER_SOURCES) $(LIBSTEMMER_UTF8_SOURCES) $(LIBSTEMMER_HEADERS) $(LIBSTEMMER_EXTRA) $${dest}/libstemmer && \
  176. mkdir -p $${dest}/include && \
  177. mv $${dest}/libstemmer/libstemmer.h $${dest}/include && \
  178. (cd $${dest} && \
  179. echo "README" >> MANIFEST && \
  180. ls $(c_src_dir)/*.c $(c_src_dir)/*.h >> MANIFEST && \
  181. ls runtime/*.c runtime/*.h >> MANIFEST && \
  182. ls libstemmer/*.c libstemmer/*.h >> MANIFEST && \
  183. ls include/*.h >> MANIFEST) && \
  184. cp -a libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak $${dest}/ && \
  185. echo 'include mkinc.mak' >> $${dest}/Makefile && \
  186. echo 'CFLAGS=-Iinclude' >> $${dest}/Makefile && \
  187. echo 'all: libstemmer.o stemwords' >> $${dest}/Makefile && \
  188. echo 'libstemmer.o: $$(snowball_sources:.c=.o)' >> $${dest}/Makefile && \
  189. echo ' $$(AR) -cru $$@ $$^' >> $${dest}/Makefile && \
  190. echo 'stemwords: examples/stemwords.o libstemmer.o' >> $${dest}/Makefile && \
  191. echo ' $$(CC) -o $$@ $$^' >> $${dest}/Makefile && \
  192. echo 'clean:' >> $${dest}/Makefile && \
  193. echo ' rm -f stemwords *.o $(c_src_dir)/*.o runtime/*.o libstemmer/*.o' >> $${dest}/Makefile && \
  194. (cd dist && tar zcf $${destname}.tgz $${destname}) && \
  195. rm -rf $${dest}
  196. # Make a distribution of all the sources required to compile the Java library.
  197. dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
  198. $(LIBSTEMMER_EXTRA) \
  199. $(JAVA_SOURCES)
  200. destname=libstemmer_java; \
  201. dest=dist/$${destname}; \
  202. rm -rf $${dest} && \
  203. rm -f $${dest}.tgz && \
  204. mkdir -p $${dest} && \
  205. cp -a doc/libstemmer_java_README $${dest}/README && \
  206. mkdir -p $${dest}/$(java_src_dir) && \
  207. cp -a $(JAVA_SOURCES) $${dest}/$(java_src_dir) && \
  208. mkdir -p $${dest}/$(java_src_main_dir) && \
  209. cp -a $(JAVARUNTIME_SOURCES) $${dest}/$(java_src_main_dir) && \
  210. (cd $${dest} && \
  211. echo "README" >> MANIFEST && \
  212. ls $(java_src_dir)/*.java >> MANIFEST && \
  213. ls $(java_src_main_dir)/*.java >> MANIFEST) && \
  214. (cd dist && tar zcf $${destname}.tgz $${destname}) && \
  215. rm -rf $${dest}
  216. check: check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r
  217. check_utf8: $(libstemmer_algorithms:%=check_utf8_%)
  218. check_iso_8859_1: $(ISO_8859_1_algorithms:%=check_iso_8859_1_%)
  219. check_iso_8859_2: $(ISO_8859_2_algorithms:%=check_iso_8859_2_%)
  220. check_koi8r: $(KOI8_R_algorithms:%=check_koi8r_%)
  221. # Where the data files are located - assumed their repo is checked out as
  222. # a sibling to this one.
  223. STEMMING_DATA = ../snowball-data
  224. check_utf8_%: $(STEMMING_DATA)/% stemwords
  225. @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with UTF-8"
  226. @./stemwords -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt
  227. @diff -u $</output.txt tmp.txt
  228. @if [ -e $</diffs.txt ] ; \
  229. then \
  230. ./stemwords -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt -p2 && \
  231. diff -u $</diffs.txt tmp.txt; \
  232. fi
  233. @rm tmp.txt
  234. check_iso_8859_1_%: $(STEMMING_DATA)/% stemwords
  235. @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_1"
  236. @python -c 'print(open("$</voc.txt").read().decode("utf8").encode("iso8859-1"))' | \
  237. ./stemwords -c ISO_8859_1 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
  238. @python -c 'print(open("$</output.txt").read().decode("utf8").encode("iso8859-1"))' | \
  239. diff -u - tmp.txt
  240. @rm tmp.txt
  241. check_iso_8859_2_%: $(STEMMING_DATA)/% stemwords
  242. @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_2"
  243. @python -c 'print(open("$</voc.txt").read().decode("utf8").encode("iso8859-2"))' | \
  244. ./stemwords -c ISO_8859_2 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
  245. @python -c 'print(open("$</output.txt").read().decode("utf8").encode("iso8859-2"))' | \
  246. diff -u - tmp.txt
  247. @rm tmp.txt
  248. check_koi8r_%: $(STEMMING_DATA)/% stemwords
  249. @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with KOI8R"
  250. @python -c 'print(open("$</voc.txt").read().decode("utf8").encode("koi8_r"))' | \
  251. ./stemwords -c KOI8_R -l `echo $<|sed 's!.*/!!'` -o tmp.txt
  252. @python -c 'print(open("$</output.txt").read().decode("utf8").encode("koi8_r"))' | \
  253. diff -u - tmp.txt
  254. @rm tmp.txt