You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

kann.h 8.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. /*
  2. The MIT License
  3. Copyright (c) 2018-2019 Dana-Farber Cancer Institute
  4. 2016-2018 Broad Institute
  5. Permission is hereby granted, free of charge, to any person obtaining
  6. a copy of this software and associated documentation files (the
  7. "Software"), to deal in the Software without restriction, including
  8. without limitation the rights to use, copy, modify, merge, publish,
  9. distribute, sublicense, and/or sell copies of the Software, and to
  10. permit persons to whom the Software is furnished to do so, subject to
  11. the following conditions:
  12. The above copyright notice and this permission notice shall be
  13. included in all copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  18. BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19. ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  20. CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. SOFTWARE.
  22. */
  23. #ifndef KANN_H
  24. #define KANN_H
  25. #define KANN_VERSION "r536"
  26. #define KANN_F_IN 0x1 /* input */
  27. #define KANN_F_OUT 0x2 /* output */
  28. #define KANN_F_TRUTH 0x4 /* truth output */
  29. #define KANN_F_COST 0x8 /* final cost */
  30. #define KANN_C_CEB 1 /* binary cross-entropy cost, used with sigmoid */
  31. #define KANN_C_CEM 2 /* multi-class cross-entropy cost, used with softmax */
  32. #define KANN_C_CEB_NEG 3 /* binary cross-enytopy-like cost, used with tanh */
  33. #define KANN_C_MSE 4 /* mean square error */
  34. #define KANN_RNN_VAR_H0 0x1 /* take the initial hidden values as variables */
  35. #define KANN_RNN_NORM 0x2 /* apply layer normalization */
  36. #include "kautodiff.h"
  37. typedef struct {
  38. int n; /* number of nodes in the computational graph */
  39. kad_node_t **v; /* list of nodes */
  40. float *x, *g, *c; /* collated variable values, gradients and constant values */
  41. void *mt; /* auxiliary data for multi-threading; NULL if multi-threading disabled */
  42. } kann_t;
  43. extern int kann_verbose;
  44. #define kann_size_var(a) kad_size_var((a)->n, (a)->v)
  45. #define kann_size_const(a) kad_size_const((a)->n, (a)->v)
  46. #define kann_dim_in(a) kann_feed_dim((a), KANN_F_IN, 0)
  47. #define kann_dim_out(a) kann_feed_dim((a), KANN_F_TRUTH, 0)
  48. #define kann_srand(seed) kad_srand(0, (seed))
  49. #define kann_drand() kad_drand(0)
  50. #define kann_set_batch_size(ann, B) kad_sync_dim((ann)->n, (ann)->v, (B))
  51. #ifdef __cplusplus
  52. extern "C" {
  53. #endif
  54. /**
  55. * Generate a network from a computational graph
  56. *
  57. * A network must have at least one scalar cost node (i.e. whose n_d==0). It
  58. * may optionally contain other cost nodes or output nodes not leading to the
  59. * primary cost node.
  60. *
  61. * @param cost cost node (must be a scalar, i.e. cost->n_d==0)
  62. * @param n_rest number of other nodes without predecessors
  63. * @param ... other nodes (of type kad_node_t*) without predecessors
  64. *
  65. * @return network on success, or NULL otherwise
  66. */
  67. kann_t *kann_new(kad_node_t *cost, int n_rest, ...);
  68. /**
  69. * Unroll an RNN
  70. *
  71. * @param a network
  72. * @param len number of unrolls
  73. *
  74. * @return an unrolled network, or NULL if the network is not an RNN
  75. */
  76. kann_t *kann_unroll(kann_t *a, ...);
  77. kann_t *kann_unroll_array(kann_t *a, int *len);
  78. kann_t *kann_clone(kann_t *a, int batch_size);
  79. void kann_delete(kann_t *a); /* delete a network generated by kann_new() or kann_layer_final() */
  80. void kann_delete_unrolled(kann_t *a); /* delete a network generated by kann_unroll() */
  81. /**
  82. * Enable/disable multi-threading (requiring pthread)
  83. *
  84. * KANN splits a mini-batch to $n_threads mini-mini-batches and puts each of
  85. * them on one thread. So far, only kann_cost() takes the advantage of
  86. * multi-threading.
  87. *
  88. * @param ann network
  89. * @param n_threads number of threads; <=1 to completely disable multi-threading
  90. * @param max_batch_size max mini-batch size; shall no smaller than n_threads
  91. */
  92. void kann_mt(kann_t *ann, int n_threads, int max_batch_size);
  93. /**
  94. * Bind float arrays to feed nodes
  95. *
  96. * @param a network
  97. * @param ext_flag required external flags
  98. * @param ext_label required external label
  99. * @param x pointers (size equal to the number of matching feed nodes)
  100. *
  101. * @return number of matching feed nodes
  102. */
  103. int kann_feed_bind(kann_t *a, uint32_t ext_flag, int32_t ext_label, float **x);
  104. /**
  105. * Compute the cost and optionally gradients
  106. *
  107. * @param a network
  108. * @param cost_label required external label
  109. * @param cal_grad whether to compute gradients
  110. *
  111. * @return cost
  112. */
  113. float kann_cost(kann_t *a, int cost_label, int cal_grad);
  114. int kann_eval(kann_t *a, uint32_t ext_flag, int ext_label);
  115. int kann_eval_out(kann_t *a);
  116. int kann_class_error(const kann_t *ann, int *base);
  117. /**
  118. * Find a node
  119. *
  120. * @param a network
  121. * @param ext_flag required external flags; set to 0 to match all flags
  122. * @param ext_label required external label
  123. *
  124. * @return >=0 if found; -1 if not found; -2 if found multiple
  125. */
  126. int kann_find(const kann_t *a, uint32_t ext_flag, int32_t ext_label);
  127. /**
  128. * Get the size of a feed node, assuming mini-batch size 1
  129. *
  130. * @param a network
  131. * @param ext_flag required external flags
  132. * @param ext_label required external label
  133. *
  134. * @return size>=0; -1 if not found; -2 if found multiple
  135. */
  136. int kann_feed_dim(const kann_t *a, uint32_t ext_flag, int32_t ext_label);
  137. /**
  138. * Get an RNN ready for continuous feeding
  139. *
  140. * @param a network
  141. */
  142. void kann_rnn_start(kann_t *a);
  143. void kann_rnn_end(kann_t *a);
  144. /**
  145. * Switch between training and prediction networks (effective only when there are switch nodes)
  146. *
  147. * @param a network
  148. * @param is_train 0 for prediction network and non-zero for training net
  149. */
  150. void kann_switch(kann_t *a, int is_train);
  151. /**
  152. * RMSprop update
  153. *
  154. * @param n number of variables
  155. * @param h0 learning rate
  156. * @param h per-variable learning rate; NULL if not applicable
  157. * @param decay RMSprop decay; use 0.9 if unsure
  158. * @param g gradient, of size n
  159. * @param t variables to change
  160. * @param r memory, of size n
  161. */
  162. void kann_RMSprop(int n, float h0, const float *h, float decay, const float *g, float *t, float *r);
  163. void kann_shuffle(int n, int *s);
  164. float kann_grad_clip(float thres, int n, float *g);
  165. /* common layers */
  166. kad_node_t *kann_layer_input(int n1);
  167. kad_node_t *kann_layer_dense(kad_node_t *in, int n1);
  168. kad_node_t *kann_layer_dropout(kad_node_t *t, float r);
  169. kad_node_t *kann_layer_layernorm(kad_node_t *in);
  170. kad_node_t *kann_layer_rnn(kad_node_t *in, int n1, int rnn_flag);
  171. kad_node_t *kann_layer_lstm(kad_node_t *in, int n1, int rnn_flag);
  172. kad_node_t *kann_layer_gru(kad_node_t *in, int n1, int rnn_flag);
  173. kad_node_t *kann_layer_conv2d(kad_node_t *in, int n_flt, int k_rows, int k_cols, int stride_r, int stride_c, int pad_r, int pad_c);
  174. kad_node_t *kann_layer_conv1d(kad_node_t *in, int n_flt, int k_size, int stride, int pad);
  175. kad_node_t *kann_layer_cost(kad_node_t *t, int n_out, int cost_type);
  176. kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...); /* flag can be KAD_CONST or KAD_VAR */
  177. kad_node_t *kann_new_scalar(uint8_t flag, float x);
  178. kad_node_t *kann_new_weight(int n_row, int n_col);
  179. kad_node_t *kann_new_bias(int n);
  180. kad_node_t *kann_new_weight_conv2d(int n_out, int n_in, int k_row, int k_col);
  181. kad_node_t *kann_new_weight_conv1d(int n_out, int n_in, int kernel_len);
  182. kad_node_t *kann_new_leaf_array(int *offset, kad_node_p *par, uint8_t flag, float x0_01, int n_d, int32_t d[KAD_MAX_DIM]);
  183. kad_node_t *kann_new_leaf2(int *offset, kad_node_p *par, uint8_t flag, float x0_01, int n_d, ...);
  184. kad_node_t *kann_layer_dense2(int *offset, kad_node_p *par, kad_node_t *in, int n1);
  185. kad_node_t *kann_layer_dropout2(int *offset, kad_node_p *par, kad_node_t *t, float r);
  186. kad_node_t *kann_layer_layernorm2(int *offset, kad_node_t **par, kad_node_t *in);
  187. kad_node_t *kann_layer_rnn2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);
  188. kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);
  189. /* operations on network with a single input node and a single output node */
  190. typedef void (*kann_train_cb)(int iter, float train_cost, float val_cost, void *ud);
  191. int kann_train_fnn1(kann_t *ann, float lr, int mini_size, int max_epoch,
  192. int max_drop_streak, float frac_val, int n,
  193. float **_x, float **_y, kann_train_cb cb, void *ud);
  194. float kann_cost_fnn1(kann_t *a, int n, float **x, float **y);
  195. const float *kann_apply1(kann_t *a, float *x);
  196. /* model I/O */
  197. void kann_save_fp(FILE *fp, kann_t *ann);
  198. void kann_save(const char *fn, kann_t *ann);
  199. kann_t *kann_load_fp(FILE *fp);
  200. kann_t *kann_load(const char *fn);
  201. #ifdef __cplusplus
  202. }
  203. #endif
  204. #endif