Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. /**
  2. * @file lib.c
  3. * @brief StringZilla C library with dynamic backed dispatch for the most appropriate implementation.
  4. * @author Ash Vardanian
  5. * @date January 16, 2024
  6. * @copyright Copyright (c) 2024
  7. */
  8. #if defined(_WIN32) || defined(__CYGWIN__)
  9. #include <windows.h> // `DllMain`
  10. #endif
  11. // Overwrite `SZ_DYNAMIC_DISPATCH` before including StringZilla.
  12. #ifdef SZ_DYNAMIC_DISPATCH
  13. #undef SZ_DYNAMIC_DISPATCH
  14. #endif
  15. #define SZ_DYNAMIC_DISPATCH 1
  16. #include <stringzilla/stringzilla.h>
  17. #if SZ_AVOID_LIBC
  18. // If we don't have the LibC, the `malloc` definition in `stringzilla.h` will be illformed.
  19. #ifdef _MSC_VER
  20. typedef sz_size_t size_t; // Reuse the type definition we've inferred from `stringzilla.h`
  21. #else
  22. typedef __SIZE_TYPE__ size_t; // For GCC/Clang
  23. #endif
  24. int rand(void) { return 0; }
  25. void free(void *start) { sz_unused(start); }
  26. void *malloc(size_t length) {
  27. sz_unused(length);
  28. return SZ_NULL;
  29. }
  30. #endif
  31. SZ_DYNAMIC sz_capability_t sz_capabilities(void) {
  32. #if SZ_USE_X86_AVX512 || SZ_USE_X86_AVX2
  33. /// The states of 4 registers populated for a specific "cpuid" assembly call
  34. union four_registers_t {
  35. int array[4];
  36. struct separate_t {
  37. unsigned eax, ebx, ecx, edx;
  38. } named;
  39. } info1, info7;
  40. #ifdef _MSC_VER
  41. __cpuidex(info1.array, 1, 0);
  42. __cpuidex(info7.array, 7, 0);
  43. #else
  44. __asm__ __volatile__("cpuid"
  45. : "=a"(info1.named.eax), "=b"(info1.named.ebx), "=c"(info1.named.ecx), "=d"(info1.named.edx)
  46. : "a"(1), "c"(0));
  47. __asm__ __volatile__("cpuid"
  48. : "=a"(info7.named.eax), "=b"(info7.named.ebx), "=c"(info7.named.ecx), "=d"(info7.named.edx)
  49. : "a"(7), "c"(0));
  50. #endif
  51. // Check for AVX2 (Function ID 7, EBX register)
  52. // https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L148
  53. unsigned supports_avx2 = (info7.named.ebx & 0x00000020) != 0;
  54. // Check for AVX512F (Function ID 7, EBX register)
  55. // https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L155
  56. unsigned supports_avx512f = (info7.named.ebx & 0x00010000) != 0;
  57. // Check for AVX512BW (Function ID 7, EBX register)
  58. // https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L166
  59. unsigned supports_avx512bw = (info7.named.ebx & 0x40000000) != 0;
  60. // Check for AVX512VL (Function ID 7, EBX register)
  61. // https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L167C25-L167C35
  62. unsigned supports_avx512vl = (info7.named.ebx & 0x80000000) != 0;
  63. // Check for GFNI (Function ID 1, ECX register)
  64. // https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L171C30-L171C40
  65. unsigned supports_avx512vbmi = (info1.named.ecx & 0x00000002) != 0;
  66. // Check for GFNI (Function ID 1, ECX register)
  67. // https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L177C30-L177C40
  68. unsigned supports_gfni = (info1.named.ecx & 0x00000100) != 0;
  69. return (sz_capability_t)( //
  70. (sz_cap_x86_avx2_k * supports_avx2) | //
  71. (sz_cap_x86_avx512f_k * supports_avx512f) | //
  72. (sz_cap_x86_avx512vl_k * supports_avx512vl) | //
  73. (sz_cap_x86_avx512bw_k * supports_avx512bw) | //
  74. (sz_cap_x86_avx512vbmi_k * supports_avx512vbmi) | //
  75. (sz_cap_x86_gfni_k * (supports_gfni)) | //
  76. (sz_cap_serial_k));
  77. #endif // SIMSIMD_TARGET_X86
  78. #if SZ_USE_ARM_NEON || SZ_USE_ARM_SVE
  79. // Every 64-bit Arm CPU supports NEON
  80. unsigned supports_neon = 1;
  81. unsigned supports_sve = 0;
  82. unsigned supports_sve2 = 0;
  83. sz_unused(supports_sve);
  84. sz_unused(supports_sve2);
  85. return (sz_capability_t)( //
  86. (sz_cap_arm_neon_k * supports_neon) | //
  87. (sz_cap_serial_k));
  88. #endif // SIMSIMD_TARGET_ARM
  89. return sz_cap_serial_k;
  90. }
  91. typedef struct sz_implementations_t {
  92. sz_equal_t equal;
  93. sz_order_t order;
  94. sz_move_t copy;
  95. sz_move_t move;
  96. sz_fill_t fill;
  97. sz_find_byte_t find_byte;
  98. sz_find_byte_t rfind_byte;
  99. sz_find_t find;
  100. sz_find_t rfind;
  101. sz_find_set_t find_from_set;
  102. sz_find_set_t rfind_from_set;
  103. sz_edit_distance_t edit_distance;
  104. sz_alignment_score_t alignment_score;
  105. sz_hashes_t hashes;
  106. } sz_implementations_t;
  107. static sz_implementations_t sz_dispatch_table;
  108. /**
  109. * @brief Initializes a global static "virtual table" of supported backends
  110. * Run it just once to avoiding unnecessary `if`-s.
  111. */
  112. static void sz_dispatch_table_init(void) {
  113. sz_implementations_t *impl = &sz_dispatch_table;
  114. sz_capability_t caps = sz_capabilities();
  115. sz_unused(caps); //< Unused when compiling on pre-SIMD machines.
  116. impl->equal = sz_equal_serial;
  117. impl->order = sz_order_serial;
  118. impl->copy = sz_copy_serial;
  119. impl->move = sz_move_serial;
  120. impl->fill = sz_fill_serial;
  121. impl->find = sz_find_serial;
  122. impl->rfind = sz_rfind_serial;
  123. impl->find_byte = sz_find_byte_serial;
  124. impl->rfind_byte = sz_rfind_byte_serial;
  125. impl->find_from_set = sz_find_charset_serial;
  126. impl->rfind_from_set = sz_rfind_charset_serial;
  127. impl->edit_distance = sz_edit_distance_serial;
  128. impl->alignment_score = sz_alignment_score_serial;
  129. impl->hashes = sz_hashes_serial;
  130. #if SZ_USE_X86_AVX2
  131. if (caps & sz_cap_x86_avx2_k) {
  132. impl->copy = sz_copy_avx2;
  133. impl->move = sz_move_avx2;
  134. impl->fill = sz_fill_avx2;
  135. impl->find_byte = sz_find_byte_avx2;
  136. impl->rfind_byte = sz_rfind_byte_avx2;
  137. impl->find = sz_find_avx2;
  138. impl->rfind = sz_rfind_avx2;
  139. }
  140. #endif
  141. #if SZ_USE_X86_AVX512
  142. if (caps & sz_cap_x86_avx512f_k) {
  143. impl->equal = sz_equal_avx512;
  144. impl->order = sz_order_avx512;
  145. impl->copy = sz_copy_avx512;
  146. impl->move = sz_move_avx512;
  147. impl->fill = sz_fill_avx512;
  148. impl->find = sz_find_avx512;
  149. impl->rfind = sz_rfind_avx512;
  150. impl->find_byte = sz_find_byte_avx512;
  151. impl->rfind_byte = sz_rfind_byte_avx512;
  152. impl->edit_distance = sz_edit_distance_avx512;
  153. }
  154. if ((caps & sz_cap_x86_avx512f_k) && (caps & sz_cap_x86_avx512vl_k) && (caps & sz_cap_x86_gfni_k) &&
  155. (caps & sz_cap_x86_avx512bw_k) && (caps & sz_cap_x86_avx512vbmi_k)) {
  156. impl->find_from_set = sz_find_charset_avx512;
  157. impl->rfind_from_set = sz_rfind_charset_avx512;
  158. impl->alignment_score = sz_alignment_score_avx512;
  159. }
  160. #endif
  161. #if SZ_USE_ARM_NEON
  162. if (caps & sz_cap_arm_neon_k) {
  163. impl->find = sz_find_neon;
  164. impl->rfind = sz_rfind_neon;
  165. impl->find_byte = sz_find_byte_neon;
  166. impl->rfind_byte = sz_rfind_byte_neon;
  167. impl->find_from_set = sz_find_charset_neon;
  168. impl->rfind_from_set = sz_rfind_charset_neon;
  169. }
  170. #endif
  171. }
  172. #if defined(_MSC_VER)
  173. BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) {
  174. switch (fdwReason) {
  175. case DLL_PROCESS_ATTACH: sz_dispatch_table_init(); return TRUE;
  176. case DLL_THREAD_ATTACH: return TRUE;
  177. case DLL_THREAD_DETACH: return TRUE;
  178. case DLL_PROCESS_DETACH: return TRUE;
  179. }
  180. }
  181. #else
  182. __attribute__((constructor)) static void sz_dispatch_table_init_on_gcc_or_clang(void) { sz_dispatch_table_init(); }
  183. #endif
  184. SZ_DYNAMIC sz_bool_t sz_equal(sz_cptr_t a, sz_cptr_t b, sz_size_t length) {
  185. return sz_dispatch_table.equal(a, b, length);
  186. }
  187. SZ_DYNAMIC sz_ordering_t sz_order(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length) {
  188. return sz_dispatch_table.order(a, a_length, b, b_length);
  189. }
  190. SZ_DYNAMIC void sz_copy(sz_ptr_t target, sz_cptr_t source, sz_size_t length) {
  191. sz_dispatch_table.copy(target, source, length);
  192. }
  193. SZ_DYNAMIC void sz_move(sz_ptr_t target, sz_cptr_t source, sz_size_t length) {
  194. sz_dispatch_table.move(target, source, length);
  195. }
  196. SZ_DYNAMIC void sz_fill(sz_ptr_t target, sz_size_t length, sz_u8_t value) {
  197. sz_dispatch_table.fill(target, length, value);
  198. }
  199. SZ_DYNAMIC sz_cptr_t sz_find_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle) {
  200. return sz_dispatch_table.find_byte(haystack, h_length, needle);
  201. }
  202. SZ_DYNAMIC sz_cptr_t sz_rfind_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle) {
  203. return sz_dispatch_table.rfind_byte(haystack, h_length, needle);
  204. }
  205. SZ_DYNAMIC sz_cptr_t sz_find(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length) {
  206. return sz_dispatch_table.find(haystack, h_length, needle, n_length);
  207. }
  208. SZ_DYNAMIC sz_cptr_t sz_rfind(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length) {
  209. return sz_dispatch_table.rfind(haystack, h_length, needle, n_length);
  210. }
  211. SZ_DYNAMIC sz_cptr_t sz_find_charset(sz_cptr_t text, sz_size_t length, sz_charset_t const *set) {
  212. return sz_dispatch_table.find_from_set(text, length, set);
  213. }
  214. SZ_DYNAMIC sz_cptr_t sz_rfind_charset(sz_cptr_t text, sz_size_t length, sz_charset_t const *set) {
  215. return sz_dispatch_table.rfind_from_set(text, length, set);
  216. }
  217. SZ_DYNAMIC sz_size_t sz_hamming_distance( //
  218. sz_cptr_t a, sz_size_t a_length, //
  219. sz_cptr_t b, sz_size_t b_length, //
  220. sz_size_t bound) {
  221. return sz_hamming_distance_serial(a, a_length, b, b_length, bound);
  222. }
  223. SZ_DYNAMIC sz_size_t sz_hamming_distance_utf8( //
  224. sz_cptr_t a, sz_size_t a_length, //
  225. sz_cptr_t b, sz_size_t b_length, //
  226. sz_size_t bound) {
  227. return sz_hamming_distance_utf8_serial(a, a_length, b, b_length, bound);
  228. }
  229. SZ_DYNAMIC sz_size_t sz_edit_distance( //
  230. sz_cptr_t a, sz_size_t a_length, //
  231. sz_cptr_t b, sz_size_t b_length, //
  232. sz_size_t bound, sz_memory_allocator_t *alloc) {
  233. return sz_dispatch_table.edit_distance(a, a_length, b, b_length, bound, alloc);
  234. }
  235. SZ_DYNAMIC sz_size_t sz_edit_distance_utf8( //
  236. sz_cptr_t a, sz_size_t a_length, //
  237. sz_cptr_t b, sz_size_t b_length, //
  238. sz_size_t bound, sz_memory_allocator_t *alloc) {
  239. return _sz_edit_distance_wagner_fisher_serial(a, a_length, b, b_length, bound, sz_true_k, alloc);
  240. }
  241. SZ_DYNAMIC sz_ssize_t sz_alignment_score(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length,
  242. sz_error_cost_t const *subs, sz_error_cost_t gap,
  243. sz_memory_allocator_t *alloc) {
  244. return sz_dispatch_table.alignment_score(a, a_length, b, b_length, subs, gap, alloc);
  245. }
  246. SZ_DYNAMIC void sz_hashes(sz_cptr_t text, sz_size_t length, sz_size_t window_length, sz_size_t step, //
  247. sz_hash_callback_t callback, void *callback_handle) {
  248. sz_dispatch_table.hashes(text, length, window_length, step, callback, callback_handle);
  249. }
  250. SZ_DYNAMIC sz_cptr_t sz_find_char_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
  251. sz_charset_t set;
  252. sz_charset_init(&set);
  253. for (; n_length; ++n, --n_length) sz_charset_add(&set, *n);
  254. return sz_find_charset(h, h_length, &set);
  255. }
  256. SZ_DYNAMIC sz_cptr_t sz_find_char_not_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
  257. sz_charset_t set;
  258. sz_charset_init(&set);
  259. for (; n_length; ++n, --n_length) sz_charset_add(&set, *n);
  260. sz_charset_invert(&set);
  261. return sz_find_charset(h, h_length, &set);
  262. }
  263. SZ_DYNAMIC sz_cptr_t sz_rfind_char_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
  264. sz_charset_t set;
  265. sz_charset_init(&set);
  266. for (; n_length; ++n, --n_length) sz_charset_add(&set, *n);
  267. return sz_rfind_charset(h, h_length, &set);
  268. }
  269. SZ_DYNAMIC sz_cptr_t sz_rfind_char_not_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
  270. sz_charset_t set;
  271. sz_charset_init(&set);
  272. for (; n_length; ++n, --n_length) sz_charset_add(&set, *n);
  273. sz_charset_invert(&set);
  274. return sz_rfind_charset(h, h_length, &set);
  275. }
  276. sz_u64_t _sz_random_generator(void *empty_state) {
  277. sz_unused(empty_state);
  278. return (sz_u64_t)rand();
  279. }
  280. SZ_DYNAMIC void sz_generate(sz_cptr_t alphabet, sz_size_t alphabet_size, sz_ptr_t result, sz_size_t result_length,
  281. sz_random_generator_t generator, void *generator_user_data) {
  282. if (!generator) generator = _sz_random_generator;
  283. sz_generate_serial(alphabet, alphabet_size, result, result_length, generator, generator_user_data);
  284. }