123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192 |
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #ifdef BENCHMARK_CHELP
- #include <sys/time.h>
- #endif
- #include "chelper.h"
-
- int NewOnigRegex( char *pattern, int pattern_length, int option,
- OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) {
- int ret = ONIG_NORMAL;
- int error_msg_len = 0;
-
- OnigUChar *pattern_start = (OnigUChar *) pattern;
- OnigUChar *pattern_end = (OnigUChar *) (pattern + pattern_length);
-
- *error_info = (OnigErrorInfo *) malloc(sizeof(OnigErrorInfo));
- memset(*error_info, 0, sizeof(OnigErrorInfo));
-
- onig_initialize_encoding(*encoding);
-
- *error_buffer = (char*) malloc(ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
-
- memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
-
- ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info);
-
- if (ret != ONIG_NORMAL) {
- error_msg_len = onig_error_code_to_str((unsigned char*)(*error_buffer), ret, *error_info);
- if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) {
- error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1;
- }
- (*error_buffer)[error_msg_len] = '\0';
- }
- return ret;
- }
-
- int SearchOnigRegex( void *str, int str_length, int offset, int option,
- OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) {
- int ret = ONIG_MISMATCH;
- int error_msg_len = 0;
- OnigRegion *region;
- #ifdef BENCHMARK_CHELP
- struct timeval tim1, tim2;
- long t;
- #endif
-
- OnigUChar *str_start = (OnigUChar *) str;
- OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
- OnigUChar *search_start = (OnigUChar *)(str_start + offset);
- OnigUChar *search_end = str_end;
-
- #ifdef BENCHMARK_CHELP
- gettimeofday(&tim1, NULL);
- #endif
-
- region = onig_region_new();
-
- ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option);
- if (ret < 0 && error_buffer != NULL) {
- error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info);
- if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) {
- error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1;
- }
- error_buffer[error_msg_len] = '\0';
- }
- else if (captures != NULL) {
- int i;
- int count = 0;
- for (i = 0; i < region->num_regs; i++) {
- captures[2*count] = region->beg[i];
- captures[2*count+1] = region->end[i];
- count ++;
- }
- *numCaptures = count;
- }
-
- onig_region_free(region, 1);
-
- #ifdef BENCHMARK_CHELP
- gettimeofday(&tim2, NULL);
- t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
- printf("%ld microseconds elapsed\n", t);
- #endif
- return ret;
- }
-
- int MatchOnigRegex(void *str, int str_length, int offset, int option,
- OnigRegex regex) {
- int ret = ONIG_MISMATCH;
- int error_msg_len = 0;
- OnigRegion *region;
- #ifdef BENCHMARK_CHELP
- struct timeval tim1, tim2;
- long t;
- #endif
-
- OnigUChar *str_start = (OnigUChar *) str;
- OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
- OnigUChar *search_start = (OnigUChar *)(str_start + offset);
-
- #ifdef BENCHMARK_CHELP
- gettimeofday(&tim1, NULL);
- #endif
- region = onig_region_new();
- ret = onig_match(regex, str_start, str_end, search_start, region, option);
- onig_region_free(region, 1);
- #ifdef BENCHMARK_CHELP
- gettimeofday(&tim2, NULL);
- t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
- printf("%ld microseconds elapsed\n", t);
- #endif
- return ret;
- }
-
- int LookupOnigCaptureByName(char *name, int name_length,
- OnigRegex regex) {
- int ret = ONIGERR_UNDEFINED_NAME_REFERENCE;
- OnigRegion *region;
- #ifdef BENCHMARK_CHELP
- struct timeval tim1, tim2;
- long t;
- #endif
- OnigUChar *name_start = (OnigUChar *) name;
- OnigUChar *name_end = (OnigUChar *) (name_start + name_length);
- #ifdef BENCHMARK_CHELP
- gettimeofday(&tim1, NULL);
- #endif
- region = onig_region_new();
- ret = onig_name_to_backref_number(regex, name_start, name_end, region);
- onig_region_free(region, 1);
- #ifdef BENCHMARK_CHELP
- gettimeofday(&tim2, NULL);
- t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
- printf("%ld microseconds elapsed\n", t);
- #endif
- return ret;
- }
-
- typedef struct {
- char *nameBuffer;
- int bufferOffset;
- int bufferSize;
- int *numbers;
- int numIndex;
- } group_info_t;
-
- int name_callback(const UChar* name, const UChar* name_end,
- int ngroup_num, int* group_nums,
- regex_t* reg, void* arg)
- {
- int nameLen, offset, newOffset;
- group_info_t *groupInfo;
-
- groupInfo = (group_info_t*) arg;
- offset = groupInfo->bufferOffset;
- nameLen = name_end - name;
- newOffset = offset + nameLen;
-
- //if there are already names, add a ";"
- if (offset > 0) {
- newOffset += 1;
- }
-
- if (newOffset <= groupInfo->bufferSize) {
- if (offset > 0) {
- groupInfo->nameBuffer[offset] = ';';
- offset += 1;
- }
- memcpy(&groupInfo->nameBuffer[offset], name, nameLen);
- }
- groupInfo->bufferOffset = newOffset;
- if (ngroup_num > 0) {
- groupInfo->numbers[groupInfo->numIndex] = group_nums[ngroup_num-1];
- } else {
- groupInfo->numbers[groupInfo->numIndex] = -1;
- }
- groupInfo->numIndex += 1;
- return 0; /* 0: continue */
- }
-
- int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbers) {
- int ret;
- group_info_t groupInfo;
- groupInfo.nameBuffer = (char*)buffer;
- groupInfo.bufferOffset = 0;
- groupInfo.bufferSize = bufferSize;
- groupInfo.numbers = groupNumbers;
- groupInfo.numIndex = 0;
- onig_foreach_name(reg, name_callback, (void* )&groupInfo);
- return groupInfo.bufferOffset;
- }
|