You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

chelper.c 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #ifdef BENCHMARK_CHELP
  5. #include <sys/time.h>
  6. #endif
  7. #include "chelper.h"
  8. int NewOnigRegex( char *pattern, int pattern_length, int option,
  9. OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) {
  10. int ret = ONIG_NORMAL;
  11. int error_msg_len = 0;
  12. OnigUChar *pattern_start = (OnigUChar *) pattern;
  13. OnigUChar *pattern_end = (OnigUChar *) (pattern + pattern_length);
  14. *error_info = (OnigErrorInfo *) malloc(sizeof(OnigErrorInfo));
  15. memset(*error_info, 0, sizeof(OnigErrorInfo));
  16. onig_initialize_encoding(*encoding);
  17. *error_buffer = (char*) malloc(ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
  18. memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
  19. ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info);
  20. if (ret != ONIG_NORMAL) {
  21. error_msg_len = onig_error_code_to_str((unsigned char*)(*error_buffer), ret, *error_info);
  22. if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) {
  23. error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1;
  24. }
  25. (*error_buffer)[error_msg_len] = '\0';
  26. }
  27. return ret;
  28. }
  29. int SearchOnigRegex( void *str, int str_length, int offset, int option,
  30. OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) {
  31. int ret = ONIG_MISMATCH;
  32. int error_msg_len = 0;
  33. OnigRegion *region;
  34. #ifdef BENCHMARK_CHELP
  35. struct timeval tim1, tim2;
  36. long t;
  37. #endif
  38. OnigUChar *str_start = (OnigUChar *) str;
  39. OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
  40. OnigUChar *search_start = (OnigUChar *)(str_start + offset);
  41. OnigUChar *search_end = str_end;
  42. #ifdef BENCHMARK_CHELP
  43. gettimeofday(&tim1, NULL);
  44. #endif
  45. region = onig_region_new();
  46. ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option);
  47. if (ret < 0 && error_buffer != NULL) {
  48. error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info);
  49. if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) {
  50. error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1;
  51. }
  52. error_buffer[error_msg_len] = '\0';
  53. }
  54. else if (captures != NULL) {
  55. int i;
  56. int count = 0;
  57. for (i = 0; i < region->num_regs; i++) {
  58. captures[2*count] = region->beg[i];
  59. captures[2*count+1] = region->end[i];
  60. count ++;
  61. }
  62. *numCaptures = count;
  63. }
  64. onig_region_free(region, 1);
  65. #ifdef BENCHMARK_CHELP
  66. gettimeofday(&tim2, NULL);
  67. t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
  68. printf("%ld microseconds elapsed\n", t);
  69. #endif
  70. return ret;
  71. }
  72. int MatchOnigRegex(void *str, int str_length, int offset, int option,
  73. OnigRegex regex) {
  74. int ret = ONIG_MISMATCH;
  75. int error_msg_len = 0;
  76. OnigRegion *region;
  77. #ifdef BENCHMARK_CHELP
  78. struct timeval tim1, tim2;
  79. long t;
  80. #endif
  81. OnigUChar *str_start = (OnigUChar *) str;
  82. OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
  83. OnigUChar *search_start = (OnigUChar *)(str_start + offset);
  84. #ifdef BENCHMARK_CHELP
  85. gettimeofday(&tim1, NULL);
  86. #endif
  87. region = onig_region_new();
  88. ret = onig_match(regex, str_start, str_end, search_start, region, option);
  89. onig_region_free(region, 1);
  90. #ifdef BENCHMARK_CHELP
  91. gettimeofday(&tim2, NULL);
  92. t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
  93. printf("%ld microseconds elapsed\n", t);
  94. #endif
  95. return ret;
  96. }
  97. int LookupOnigCaptureByName(char *name, int name_length,
  98. OnigRegex regex) {
  99. int ret = ONIGERR_UNDEFINED_NAME_REFERENCE;
  100. OnigRegion *region;
  101. #ifdef BENCHMARK_CHELP
  102. struct timeval tim1, tim2;
  103. long t;
  104. #endif
  105. OnigUChar *name_start = (OnigUChar *) name;
  106. OnigUChar *name_end = (OnigUChar *) (name_start + name_length);
  107. #ifdef BENCHMARK_CHELP
  108. gettimeofday(&tim1, NULL);
  109. #endif
  110. region = onig_region_new();
  111. ret = onig_name_to_backref_number(regex, name_start, name_end, region);
  112. onig_region_free(region, 1);
  113. #ifdef BENCHMARK_CHELP
  114. gettimeofday(&tim2, NULL);
  115. t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
  116. printf("%ld microseconds elapsed\n", t);
  117. #endif
  118. return ret;
  119. }
  120. typedef struct {
  121. char *nameBuffer;
  122. int bufferOffset;
  123. int bufferSize;
  124. int *numbers;
  125. int numIndex;
  126. } group_info_t;
  127. int name_callback(const UChar* name, const UChar* name_end,
  128. int ngroup_num, int* group_nums,
  129. regex_t* reg, void* arg)
  130. {
  131. int nameLen, offset, newOffset;
  132. group_info_t *groupInfo;
  133. groupInfo = (group_info_t*) arg;
  134. offset = groupInfo->bufferOffset;
  135. nameLen = name_end - name;
  136. newOffset = offset + nameLen;
  137. //if there are already names, add a ";"
  138. if (offset > 0) {
  139. newOffset += 1;
  140. }
  141. if (newOffset <= groupInfo->bufferSize) {
  142. if (offset > 0) {
  143. groupInfo->nameBuffer[offset] = ';';
  144. offset += 1;
  145. }
  146. memcpy(&groupInfo->nameBuffer[offset], name, nameLen);
  147. }
  148. groupInfo->bufferOffset = newOffset;
  149. if (ngroup_num > 0) {
  150. groupInfo->numbers[groupInfo->numIndex] = group_nums[ngroup_num-1];
  151. } else {
  152. groupInfo->numbers[groupInfo->numIndex] = -1;
  153. }
  154. groupInfo->numIndex += 1;
  155. return 0; /* 0: continue */
  156. }
  157. int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbers) {
  158. int ret;
  159. group_info_t groupInfo;
  160. groupInfo.nameBuffer = (char*)buffer;
  161. groupInfo.bufferOffset = 0;
  162. groupInfo.bufferSize = bufferSize;
  163. groupInfo.numbers = groupNumbers;
  164. groupInfo.numIndex = 0;
  165. onig_foreach_name(reg, name_callback, (void* )&groupInfo);
  166. return groupInfo.bufferOffset;
  167. }