[example] training artificial neural networks

anaël seghezzi

#11635

April 8, 2017

Following Demofox example, I used Ctoy to write and test a minimal but generic library to train neural networks with backpropagation. The code is pretty straightforward and educative, but could be optimized for performance.

main.c:

#include <ctoy.h>
#include "ann_simple.c"

void ctoy_begin(void)
{
	float gradient[12], value[6], desired_output[2];
	float weight[12] = {
		0.35f, 0.15f, 0.2f, // hidden layer node 0 (bias, weight, weight)
		0.35f, 0.25f, 0.3f, // hidden layer node 1
		0.6f, 0.4f, 0.45f,  // output layer node 2
		0.6f, 0.5f, 0.55f   // output layer node 3
	};
	int layer_count = 3;
	int layer_node_count[3] = {2, 2, 2}; // (input, hidden, output)
	float learning_rate = 0.1;
	int epoch_count = 1000000;
	int i, j;

	// input / desired output
	value[0] = 0.05f;
	value[1] = 0.1f;
	desired_output[0] = 0.01f;
	desired_output[1] = 0.99f;
	printf("desired output = %f %f\n", desired_output[0], desired_output[1]);

	// first run
	ann_run(value, weight, NULL, layer_count, layer_node_count);
	printf("initial output = %f %f\n", value[4], value[5]);

	// learning
	for (j = 0; j < epoch_count; j++) {

		// compute gradients
		ann_gradients(
			gradient,
			value, desired_output,
			weight, NULL, layer_count, layer_node_count
			);

		// update weights
		for (i = 0; i < 12; i++)
			weight[i] -= gradient[i] * learning_rate;

		// epoch run
		ann_run(value, weight, NULL, layer_count, layer_node_count);
	}

	// learned output
	printf("learned output = %f %f\n\n", value[4], value[5]);
}

void ctoy_main_loop(void)
{
	ctoy_sleep(0, 1000000);
}

void ctoy_end(void)
{}

ann_simple.c:

// activation functions
#define ann_sigmoid(x) (1.0f / (1.0f + expf(-(x))))
#define ann_gaussian(x) (expf(-(x) * (x)))
#define ann_sin(x) (sinf(x))
#define ann_cos(x) (cosf(x))
#define ann_linear(x) (x)
// derivatives
#define ann_sigmoid_d(x) ((x) * (1.0f - (x)))
#define ann_gaussian_d(x) (-2.0f * (x) * ann_gaussian(x))
#define ann_sin_d(x) (cosf(x))
#define ann_cos_d(x) (-sinf(x))
#define ann_linear_d(x) (1.0f)

int ann_weight_count(int layer_count, int *layer_node_count)
{
	int i, s = 0;
	for (i = 1; i < layer_count; i++)
		s += (layer_node_count[i - 1] + 1) * layer_node_count[i];
	return s;
}

int ann_node_count(int layer_count, int *layer_node_count)
{
	int i, s = 0;
	for (i = 0; i < layer_count; i++)
		s += layer_node_count[i];
	return s;
}

int ann_node_count_minus_input(int layer_count, int *layer_node_count)
{
	int i, s = 0;
	for (i = 1; i < layer_count; i++)
		s += layer_node_count[i];
	return s;
}

void ann_run_layer(float *output, float *input, float *weight, char *node, int output_count, int input_count)
{
	int i, j;

	if (node) {
		for (i = 0; i < output_count; i++) {

			float out;

			if (node[i] == 0) { // node is off
				output[i] = 0;
				weight += input_count + 1;
				continue;
			}

			out = weight[0]; // bias
			weight++;

			for (j = 0; j < input_count; j++) {
				out += input[j] * weight[0];
				weight++; 
			}

			switch (node[i]) {
				default:
				case 1: output[i] = ann_sigmoid(out); break;
				case 2: output[i] = ann_gaussian(out); break;
				case 3: output[i] = ann_sin(out); break;
				case 4: output[i] = ann_cos(out); break;
				case 5: output[i] = ann_linear(out); break;
			}
		}
	}
	else {
		for (i = 0; i < output_count; i++) {

			float out = weight[0]; // bias
			weight++;

			for (j = 0; j < input_count; j++) {
				out += input[j] * weight[0];
				weight++; 
			}

			output[i] = ann_sigmoid(out);
		}
	}
}

void ann_run(float *value, float *weight, char *node, int layer_count, int *layer_node_count)
{
	float *valuep, *layer_input;
	int i, layer_input_count;

	layer_input = value;
	layer_input_count = layer_node_count[0];
	valuep = value + layer_input_count;

	for (i = 1; i < layer_count; i++) {

		ann_run_layer(valuep, layer_input, weight, node, layer_node_count[i], layer_input_count);
		
		layer_input = valuep;
		valuep += layer_node_count[i];

		weight += (layer_input_count + 1) * layer_node_count[i];
		layer_input_count = layer_node_count[i];
		if (node) node += layer_node_count[i];
	}
}

void ann_node_gradient(char *node, float *gradient, float *input, int input_count, float value, float cost)
{
	float delta, err;
	int i;
	
	if (node) {
		switch (node[0]) {
			default:
			case 0: delta = 0; break; // node is off
			case 1: delta = ann_sigmoid_d(value); break;
			case 2: delta = ann_gaussian_d(value); break;
			case 3: delta = ann_sin_d(value); break;
			case 4: delta = ann_cos_d(value); break;
			case 5: delta = ann_linear_d(value); break;
		}
	}
	else {
		delta = ann_sigmoid_d(value);
	}
	err = cost * delta;

	gradient[0] = err; // bias
	for (i = 0; i < input_count; i++)
		gradient[i + 1] = err * input[i];	
}

void ann_gradients(float *gradient, float *value, float *desired_output, float *weight, char *node, int layer_count, int *layer_node_count)
{
	float *vp, *gp, *wp, *ip;
	char *np = NULL;
	int node_count, input_count;
	int i, j;

	// start from output layer data
	value += layer_node_count[0];

	for (i = 1; i < (layer_count - 1); i++) {
		int weight_count = (layer_node_count[i - 1] + 1) * layer_node_count[i];
		value += layer_node_count[i];
		weight += weight_count;
		gradient += weight_count;
		if (node) node += layer_node_count[i];
	}

	// output layer gradient
	node_count = layer_node_count[layer_count - 1];
	input_count = layer_node_count[layer_count - 2];

	vp = value;
	gp = gradient;
	ip = vp - input_count;
	if (node) np = node;

	for (j = 0; j < node_count; j++) {
		float cost = vp[j] - desired_output[j];
		ann_node_gradient(np, gp, ip, input_count, vp[j], cost);
		gp += input_count + 1;
		if (node) np++;
	}

	// hidden layers gradient
	for (i = (layer_count - 2); i >= 1; i--) {

		float *next_layer_weight = weight;
		float *next_layer_gradient = gradient;
		int k, weight_count, next_node_count = node_count;

		node_count = input_count;
		input_count = layer_node_count[i - 1];
		weight_count = (input_count + 1) * node_count;

		vp = (value -= node_count);
		gp = (gradient -= weight_count);
		ip = vp - input_count;
		if (node) np = (node -= node_count);

		for (j = 0; j < node_count; j++) {

			float *wn = next_layer_weight;
			float *gn = next_layer_gradient;
			float cost = 0;

			// cost from next nodes errors
			for (k = 0; k < next_node_count; k++) {
				cost += gn[0] * wn[1 + j];
				wn += node_count + 1;
				gn += node_count + 1;
			}

			ann_node_gradient(np, gp, ip, input_count, vp[j], cost);
			gp += input_count + 1;
			if (node) np++;
		}

		weight -= weight_count;
	}
}

anaël seghezzi

#11636

April 8, 2017

And similar but with dynamic memory allocation and 2 hidden layers:

#include <ctoy.h>
#include "ann_simple.c"

void ctoy_begin(void)
{
	float *weight, *gradient, *value, desired_output[2];
	int layer_count = 4;
	int layer_node_count[4] = {2, 4, 8, 2};
	float learning_rate = 0.1;
	int epoch_count = 100000;
	int node_count = ann_node_count(layer_count, layer_node_count);
	int weight_count = ann_weight_count(layer_count, layer_node_count);
	int i, j;

	// alloc
	weight   = malloc(weight_count * sizeof(float));
	gradient = malloc(weight_count * sizeof(float));
	value    = malloc(node_count * sizeof(float));

	// weights init
	for (i = 0; i < weight_count; i++)
		weight[i] = m_randf() * 0.1;

	// input / desired output
	value[0] = 0.05f;
	value[1] = 0.1f;
	desired_output[0] = 0.01f;
	desired_output[1] = 0.99f;
	printf("desired output = %f %f\n", desired_output[0], desired_output[1]);

	// first run
	ann_run(value, weight, NULL, layer_count, layer_node_count);
	printf("initial output = %f %f\n", value[node_count - 2], value[node_count - 1]);

	// learning
	for (j = 0; j < epoch_count; j++) {

		// compute gradients
		ann_gradients(
			gradient,
			value, desired_output,
			weight, NULL, layer_count, layer_node_count
			);

		// update weights
		for (i = 0; i < weight_count; i++)
			weight[i] -= gradient[i] * learning_rate;

		// epoch run
		ann_run(value, weight, NULL, layer_count, layer_node_count);
	}

	// learned output
	printf("learned output = %f %f\n\n", value[node_count - 2], value[node_count - 1]);

	// clear
	free(weight);
	free(gradient);
	free(value);
}

void ctoy_main_loop(void)
{
	ctoy_sleep(0, 1000000);
}

void ctoy_end(void)
{}

Edited by anaël seghezzi on April 8, 2017, 4:00pm

Robert Herman

#17356

February 6, 2019

I know the thread is old, but I wanted to thank you for CToy.
It reminds me of my first days of programming in 1978 when it was simpler to just do something. I went through this ANN exercise, and made more progress in 2 hours with neural networks than I have with pre-canned libraries (python), and tutorials. Yes, they get you very far in terms of being able to put something together quickly, but I don't own it like I do when I go back to my C roots and raw code. For this I am grateful. I really like Raylib, but I find myself playing with CToy much more! Thanks again!

anaël seghezzi

#20525

February 18, 2019

You are welcome ! I appreciate it, thank you.

I'm happy being back to C too, I use C-Toy almost everyday in my work to prototype.