[example] training artificial neural networks

Following Demofox example, I used Ctoy to write and test a minimal but generic library to train neural networks with backpropagation. The code is pretty straightforward and educative, but could be optimized for performance.

main.c:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <ctoy.h>
#include "ann_simple.c"

void ctoy_begin(void)
{
	float gradient[12], value[6], desired_output[2];
	float weight[12] = {
		0.35f, 0.15f, 0.2f, // hidden layer node 0 (bias, weight, weight)
		0.35f, 0.25f, 0.3f, // hidden layer node 1
		0.6f, 0.4f, 0.45f,  // output layer node 2
		0.6f, 0.5f, 0.55f   // output layer node 3
	};
	int layer_count = 3;
	int layer_node_count[3] = {2, 2, 2}; // (input, hidden, output)
	float learning_rate = 0.1;
	int epoch_count = 1000000;
	int i, j;

	// input / desired output
	value[0] = 0.05f;
	value[1] = 0.1f;
	desired_output[0] = 0.01f;
	desired_output[1] = 0.99f;
	printf("desired output = %f %f\n", desired_output[0], desired_output[1]);

	// first run
	ann_run(value, weight, NULL, layer_count, layer_node_count);
	printf("initial output = %f %f\n", value[4], value[5]);

	// learning
	for (j = 0; j < epoch_count; j++) {

		// compute gradients
		ann_gradients(
			gradient,
			value, desired_output,
			weight, NULL, layer_count, layer_node_count
			);

		// update weights
		for (i = 0; i < 12; i++)
			weight[i] -= gradient[i] * learning_rate;

		// epoch run
		ann_run(value, weight, NULL, layer_count, layer_node_count);
	}

	// learned output
	printf("learned output = %f %f\n\n", value[4], value[5]);
}

void ctoy_main_loop(void)
{
	ctoy_sleep(0, 1000000);
}

void ctoy_end(void)
{}


ann_simple.c:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
// activation functions
#define ann_sigmoid(x) (1.0f / (1.0f + expf(-(x))))
#define ann_gaussian(x) (expf(-(x) * (x)))
#define ann_sin(x) (sinf(x))
#define ann_cos(x) (cosf(x))
#define ann_linear(x) (x)
// derivatives
#define ann_sigmoid_d(x) ((x) * (1.0f - (x)))
#define ann_gaussian_d(x) (-2.0f * (x) * ann_gaussian(x))
#define ann_sin_d(x) (cosf(x))
#define ann_cos_d(x) (-sinf(x))
#define ann_linear_d(x) (1.0f)

int ann_weight_count(int layer_count, int *layer_node_count)
{
	int i, s = 0;
	for (i = 1; i < layer_count; i++)
		s += (layer_node_count[i - 1] + 1) * layer_node_count[i];
	return s;
}

int ann_node_count(int layer_count, int *layer_node_count)
{
	int i, s = 0;
	for (i = 0; i < layer_count; i++)
		s += layer_node_count[i];
	return s;
}

int ann_node_count_minus_input(int layer_count, int *layer_node_count)
{
	int i, s = 0;
	for (i = 1; i < layer_count; i++)
		s += layer_node_count[i];
	return s;
}

void ann_run_layer(float *output, float *input, float *weight, char *node, int output_count, int input_count)
{
	int i, j;

	if (node) {
		for (i = 0; i < output_count; i++) {

			float out;

			if (node[i] == 0) { // node is off
				output[i] = 0;
				weight += input_count + 1;
				continue;
			}

			out = weight[0]; // bias
			weight++;

			for (j = 0; j < input_count; j++) {
				out += input[j] * weight[0];
				weight++; 
			}

			switch (node[i]) {
				default:
				case 1: output[i] = ann_sigmoid(out); break;
				case 2: output[i] = ann_gaussian(out); break;
				case 3: output[i] = ann_sin(out); break;
				case 4: output[i] = ann_cos(out); break;
				case 5: output[i] = ann_linear(out); break;
			}
		}
	}
	else {
		for (i = 0; i < output_count; i++) {

			float out = weight[0]; // bias
			weight++;

			for (j = 0; j < input_count; j++) {
				out += input[j] * weight[0];
				weight++; 
			}

			output[i] = ann_sigmoid(out);
		}
	}
}

void ann_run(float *value, float *weight, char *node, int layer_count, int *layer_node_count)
{
	float *valuep, *layer_input;
	int i, layer_input_count;

	layer_input = value;
	layer_input_count = layer_node_count[0];
	valuep = value + layer_input_count;

	for (i = 1; i < layer_count; i++) {

		ann_run_layer(valuep, layer_input, weight, node, layer_node_count[i], layer_input_count);
		
		layer_input = valuep;
		valuep += layer_node_count[i];

		weight += (layer_input_count + 1) * layer_node_count[i];
		layer_input_count = layer_node_count[i];
		if (node) node += layer_node_count[i];
	}
}

void ann_node_gradient(char *node, float *gradient, float *input, int input_count, float value, float cost)
{
	float delta, err;
	int i;
	
	if (node) {
		switch (node[0]) {
			default:
			case 0: delta = 0; break; // node is off
			case 1: delta = ann_sigmoid_d(value); break;
			case 2: delta = ann_gaussian_d(value); break;
			case 3: delta = ann_sin_d(value); break;
			case 4: delta = ann_cos_d(value); break;
			case 5: delta = ann_linear_d(value); break;
		}
	}
	else {
		delta = ann_sigmoid_d(value);
	}
	err = cost * delta;

	gradient[0] = err; // bias
	for (i = 0; i < input_count; i++)
		gradient[i + 1] = err * input[i];	
}

void ann_gradients(float *gradient, float *value, float *desired_output, float *weight, char *node, int layer_count, int *layer_node_count)
{
	float *vp, *gp, *wp, *ip;
	char *np = NULL;
	int node_count, input_count;
	int i, j;

	// start from output layer data
	value += layer_node_count[0];

	for (i = 1; i < (layer_count - 1); i++) {
		int weight_count = (layer_node_count[i - 1] + 1) * layer_node_count[i];
		value += layer_node_count[i];
		weight += weight_count;
		gradient += weight_count;
		if (node) node += layer_node_count[i];
	}

	// output layer gradient
	node_count = layer_node_count[layer_count - 1];
	input_count = layer_node_count[layer_count - 2];

	vp = value;
	gp = gradient;
	ip = vp - input_count;
	if (node) np = node;

	for (j = 0; j < node_count; j++) {
		float cost = vp[j] - desired_output[j];
		ann_node_gradient(np, gp, ip, input_count, vp[j], cost);
		gp += input_count + 1;
		if (node) np++;
	}

	// hidden layers gradient
	for (i = (layer_count - 2); i >= 1; i--) {

		float *next_layer_weight = weight;
		float *next_layer_gradient = gradient;
		int k, weight_count, next_node_count = node_count;

		node_count = input_count;
		input_count = layer_node_count[i - 1];
		weight_count = (input_count + 1) * node_count;

		vp = (value -= node_count);
		gp = (gradient -= weight_count);
		ip = vp - input_count;
		if (node) np = (node -= node_count);

		for (j = 0; j < node_count; j++) {

			float *wn = next_layer_weight;
			float *gn = next_layer_gradient;
			float cost = 0;

			// cost from next nodes errors
			for (k = 0; k < next_node_count; k++) {
				cost += gn[0] * wn[1 + j];
				wn += node_count + 1;
				gn += node_count + 1;
			}

			ann_node_gradient(np, gp, ip, input_count, vp[j], cost);
			gp += input_count + 1;
			if (node) np++;
		}

		weight -= weight_count;
	}
}


And similar but with dynamic memory allocation and 2 hidden layers:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include <ctoy.h>
#include "ann_simple.c"

void ctoy_begin(void)
{
	float *weight, *gradient, *value, desired_output[2];
	int layer_count = 4;
	int layer_node_count[4] = {2, 4, 8, 2};
	float learning_rate = 0.1;
	int epoch_count = 100000;
	int node_count = ann_node_count(layer_count, layer_node_count);
	int weight_count = ann_weight_count(layer_count, layer_node_count);
	int i, j;

	// alloc
	weight   = malloc(weight_count * sizeof(float));
	gradient = malloc(weight_count * sizeof(float));
	value    = malloc(node_count * sizeof(float));

	// weights init
	for (i = 0; i < weight_count; i++)
		weight[i] = m_randf() * 0.1;

	// input / desired output
	value[0] = 0.05f;
	value[1] = 0.1f;
	desired_output[0] = 0.01f;
	desired_output[1] = 0.99f;
	printf("desired output = %f %f\n", desired_output[0], desired_output[1]);

	// first run
	ann_run(value, weight, NULL, layer_count, layer_node_count);
	printf("initial output = %f %f\n", value[node_count - 2], value[node_count - 1]);

	// learning
	for (j = 0; j < epoch_count; j++) {

		// compute gradients
		ann_gradients(
			gradient,
			value, desired_output,
			weight, NULL, layer_count, layer_node_count
			);

		// update weights
		for (i = 0; i < weight_count; i++)
			weight[i] -= gradient[i] * learning_rate;

		// epoch run
		ann_run(value, weight, NULL, layer_count, layer_node_count);
	}

	// learned output
	printf("learned output = %f %f\n\n", value[node_count - 2], value[node_count - 1]);

	// clear
	free(weight);
	free(gradient);
	free(value);
}

void ctoy_main_loop(void)
{
	ctoy_sleep(0, 1000000);
}

void ctoy_end(void)
{}

Edited by anaël seghezzi on
I know the thread is old, but I wanted to thank you for CToy.
It reminds me of my first days of programming in 1978 when it was simpler to just do something. I went through this ANN exercise, and made more progress in 2 hours with neural networks than I have with pre-canned libraries (python), and tutorials. Yes, they get you very far in terms of being able to put something together quickly, but I don't own it like I do when I go back to my C roots and raw code. For this I am grateful. I really like Raylib, but I find myself playing with CToy much more! Thanks again!
You are welcome ! I appreciate it, thank you.

I'm happy being back to C too, I use C-Toy almost everyday in my work to prototype.