Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

libstemmer.h 2.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. /* Make header file work when included from C++ */
  2. #ifdef __cplusplus
  3. extern "C" {
  4. #endif
  5. struct sb_stemmer;
  6. typedef unsigned char sb_symbol;
  7. /* FIXME - should be able to get a version number for each stemming
  8. * algorithm (which will be incremented each time the output changes). */
  9. /** Returns an array of the names of the available stemming algorithms.
  10. * Note that these are the canonical names - aliases (ie, other names for
  11. * the same algorithm) will not be included in the list.
  12. * The list is terminated with a null pointer.
  13. *
  14. * The list must not be modified in any way.
  15. */
  16. const char ** sb_stemmer_list(void);
  17. /** Create a new stemmer object, using the specified algorithm, for the
  18. * specified character encoding.
  19. *
  20. * All algorithms will usually be available in UTF-8, but may also be
  21. * available in other character encodings.
  22. *
  23. * @param algorithm The algorithm name. This is either the english
  24. * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the
  25. * language. Note that case is significant in this parameter - the
  26. * value should be supplied in lower case.
  27. *
  28. * @param charenc The character encoding. NULL may be passed as
  29. * this value, in which case UTF-8 encoding will be assumed. Otherwise,
  30. * the argument may be one of "UTF_8", "ISO_8859_1" (i.e. Latin 1),
  31. * "ISO_8859_2" (i.e. Latin 2) or "KOI8_R" (Russian). Note that case is
  32. * significant in this parameter.
  33. *
  34. * @return NULL if the specified algorithm is not recognised, or the
  35. * algorithm is not available for the requested encoding. Otherwise,
  36. * returns a pointer to a newly created stemmer for the requested algorithm.
  37. * The returned pointer must be deleted by calling sb_stemmer_delete().
  38. *
  39. * @note NULL will also be returned if an out of memory error occurs.
  40. */
  41. struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc);
  42. /** Delete a stemmer object.
  43. *
  44. * This frees all resources allocated for the stemmer. After calling
  45. * this function, the supplied stemmer may no longer be used in any way.
  46. *
  47. * It is safe to pass a null pointer to this function - this will have
  48. * no effect.
  49. */
  50. void sb_stemmer_delete(struct sb_stemmer * stemmer);
  51. /** Stem a word.
  52. *
  53. * The return value is owned by the stemmer - it must not be freed or
  54. * modified, and it will become invalid when the stemmer is called again,
  55. * or if the stemmer is freed.
  56. *
  57. * The length of the return value can be obtained using sb_stemmer_length().
  58. *
  59. * If an out-of-memory error occurs, this will return NULL.
  60. */
  61. const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer,
  62. const sb_symbol * word, int size);
  63. /** Get the length of the result of the last stemmed word.
  64. * This should not be called before sb_stemmer_stem() has been called.
  65. */
  66. int sb_stemmer_length(struct sb_stemmer * stemmer);
  67. #ifdef __cplusplus
  68. }
  69. #endif