main.c:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | #include <ctoy.h> #include "ann_simple.c" void ctoy_begin(void) { float gradient[12], value[6], desired_output[2]; float weight[12] = { 0.35f, 0.15f, 0.2f, // hidden layer node 0 (bias, weight, weight) 0.35f, 0.25f, 0.3f, // hidden layer node 1 0.6f, 0.4f, 0.45f, // output layer node 2 0.6f, 0.5f, 0.55f // output layer node 3 }; int layer_count = 3; int layer_node_count[3] = {2, 2, 2}; // (input, hidden, output) float learning_rate = 0.1; int epoch_count = 1000000; int i, j; // input / desired output value[0] = 0.05f; value[1] = 0.1f; desired_output[0] = 0.01f; desired_output[1] = 0.99f; printf("desired output = %f %f\n", desired_output[0], desired_output[1]); // first run ann_run(value, weight, NULL, layer_count, layer_node_count); printf("initial output = %f %f\n", value[4], value[5]); // learning for (j = 0; j < epoch_count; j++) { // compute gradients ann_gradients( gradient, value, desired_output, weight, NULL, layer_count, layer_node_count ); // update weights for (i = 0; i < 12; i++) weight[i] -= gradient[i] * learning_rate; // epoch run ann_run(value, weight, NULL, layer_count, layer_node_count); } // learned output printf("learned output = %f %f\n\n", value[4], value[5]); } void ctoy_main_loop(void) { ctoy_sleep(0, 1000000); } void ctoy_end(void) {} |
ann_simple.c:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 | // activation functions #define ann_sigmoid(x) (1.0f / (1.0f + expf(-(x)))) #define ann_gaussian(x) (expf(-(x) * (x))) #define ann_sin(x) (sinf(x)) #define ann_cos(x) (cosf(x)) #define ann_linear(x) (x) // derivatives #define ann_sigmoid_d(x) ((x) * (1.0f - (x))) #define ann_gaussian_d(x) (-2.0f * (x) * ann_gaussian(x)) #define ann_sin_d(x) (cosf(x)) #define ann_cos_d(x) (-sinf(x)) #define ann_linear_d(x) (1.0f) int ann_weight_count(int layer_count, int *layer_node_count) { int i, s = 0; for (i = 1; i < layer_count; i++) s += (layer_node_count[i - 1] + 1) * layer_node_count[i]; return s; } int ann_node_count(int layer_count, int *layer_node_count) { int i, s = 0; for (i = 0; i < layer_count; i++) s += layer_node_count[i]; return s; } int ann_node_count_minus_input(int layer_count, int *layer_node_count) { int i, s = 0; for (i = 1; i < layer_count; i++) s += layer_node_count[i]; return s; } void ann_run_layer(float *output, float *input, float *weight, char *node, int output_count, int input_count) { int i, j; if (node) { for (i = 0; i < output_count; i++) { float out; if (node[i] == 0) { // node is off output[i] = 0; weight += input_count + 1; continue; } out = weight[0]; // bias weight++; for (j = 0; j < input_count; j++) { out += input[j] * weight[0]; weight++; } switch (node[i]) { default: case 1: output[i] = ann_sigmoid(out); break; case 2: output[i] = ann_gaussian(out); break; case 3: output[i] = ann_sin(out); break; case 4: output[i] = ann_cos(out); break; case 5: output[i] = ann_linear(out); break; } } } else { for (i = 0; i < output_count; i++) { float out = weight[0]; // bias weight++; for (j = 0; j < input_count; j++) { out += input[j] * weight[0]; weight++; } output[i] = ann_sigmoid(out); } } } void ann_run(float *value, float *weight, char *node, int layer_count, int *layer_node_count) { float *valuep, *layer_input; int i, layer_input_count; layer_input = value; layer_input_count = layer_node_count[0]; valuep = value + layer_input_count; for (i = 1; i < layer_count; i++) { ann_run_layer(valuep, layer_input, weight, node, layer_node_count[i], layer_input_count); layer_input = valuep; valuep += layer_node_count[i]; weight += (layer_input_count + 1) * layer_node_count[i]; layer_input_count = layer_node_count[i]; if (node) node += layer_node_count[i]; } } void ann_node_gradient(char *node, float *gradient, float *input, int input_count, float value, float cost) { float delta, err; int i; if (node) { switch (node[0]) { default: case 0: delta = 0; break; // node is off case 1: delta = ann_sigmoid_d(value); break; case 2: delta = ann_gaussian_d(value); break; case 3: delta = ann_sin_d(value); break; case 4: delta = ann_cos_d(value); break; case 5: delta = ann_linear_d(value); break; } } else { delta = ann_sigmoid_d(value); } err = cost * delta; gradient[0] = err; // bias for (i = 0; i < input_count; i++) gradient[i + 1] = err * input[i]; } void ann_gradients(float *gradient, float *value, float *desired_output, float *weight, char *node, int layer_count, int *layer_node_count) { float *vp, *gp, *wp, *ip; char *np = NULL; int node_count, input_count; int i, j; // start from output layer data value += layer_node_count[0]; for (i = 1; i < (layer_count - 1); i++) { int weight_count = (layer_node_count[i - 1] + 1) * layer_node_count[i]; value += layer_node_count[i]; weight += weight_count; gradient += weight_count; if (node) node += layer_node_count[i]; } // output layer gradient node_count = layer_node_count[layer_count - 1]; input_count = layer_node_count[layer_count - 2]; vp = value; gp = gradient; ip = vp - input_count; if (node) np = node; for (j = 0; j < node_count; j++) { float cost = vp[j] - desired_output[j]; ann_node_gradient(np, gp, ip, input_count, vp[j], cost); gp += input_count + 1; if (node) np++; } // hidden layers gradient for (i = (layer_count - 2); i >= 1; i--) { float *next_layer_weight = weight; float *next_layer_gradient = gradient; int k, weight_count, next_node_count = node_count; node_count = input_count; input_count = layer_node_count[i - 1]; weight_count = (input_count + 1) * node_count; vp = (value -= node_count); gp = (gradient -= weight_count); ip = vp - input_count; if (node) np = (node -= node_count); for (j = 0; j < node_count; j++) { float *wn = next_layer_weight; float *gn = next_layer_gradient; float cost = 0; // cost from next nodes errors for (k = 0; k < next_node_count; k++) { cost += gn[0] * wn[1 + j]; wn += node_count + 1; gn += node_count + 1; } ann_node_gradient(np, gp, ip, input_count, vp[j], cost); gp += input_count + 1; if (node) np++; } weight -= weight_count; } } |