neural_network.py

# Python code example for a simple neural network
# This is derived from the blog post by Jason Brownlee,
# https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/
# Adapted by Sally Goldin, 26 September 2022
#
# Further adapted for Supervised and Unsupervised Machine Learning by Kane
# Most accurate (dewtermined from another file): Hidden Layer: 5, Learning Rate: 0.900000, Epochs: 900, Accuracy: 0.952381

from random import seed
from random import random
from math import exp
import csv
from tqdm import tqdm
 
# Initialize a network
# This assumes a single hidden layer
# n_inputs -- number of neurons in the input layer (normally, the number of feature dimensions)
# n_hidden -- number of neurons in the hidden layer
# n_outputs -- number of neurons in the output layer (normally, the number of classes)
def initialize_network(n_inputs, n_hidden, n_outputs):
	network = list()
	hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
	# generate a set of random weights for each neuron - one for each incoming connection plus the bias value
	network.append(hidden_layer)
	output_layer = [{'weights':[random() for i in range(n_hidden + 1)]} for i in range(n_outputs)]
	network.append(output_layer)
	return network

# Calculate neuron activation for an input
# activation = sum(weight_i * input_i) + bias
def activate(weights, inputs):
	activation = weights[-1]    
	for i in range(len(weights)-1):
		activation += weights[i] * inputs[i]
	return activation

# Transfer (scale) neuron activation using sigmoid function
def transfer(activation):
	return 1.0 / (1.0 + exp(-activation))


# Forward propagate input to a network output
def forward_propagate(network, row):
	inputs = row
	for layer in network:
		new_inputs = []
		for neuron in layer:
			activation = activate(neuron['weights'], inputs)
			neuron['output'] = transfer(activation)
			new_inputs.append(neuron['output'])
		inputs = new_inputs
	return inputs

# Calculate the derivative of a neuron output
# This assumes we're using the sigmoid transfer function
def transfer_derivative(output):
	return output * (1.0 - output)

# Backpropagate error and store in neurons
# Start at the output layer
# Calculate the difference between the output generated by the
# network and the true values (expected) -- this is sometimes
# called the loss
# Propagate backward through the layers, accumulating the errors
# based on the weights
def backward_propagate_error(network, expected):
	for i in reversed(range(len(network))):
		layer = network[i]
		errors = list()
		if i != len(network)-1:       # hidden layers
			for j in range(len(layer)):
				error = 0.0
				for neuron in network[i + 1]:
					error += (neuron['weights'][j] * neuron['delta'])  # weighted sum of errors
				errors.append(error)
		else:
			for j in range(len(layer)):  # output layer
				neuron = layer[j]
				errors.append(neuron['output'] - expected[j])  # simple difference between generated and true value
		for j in range(len(layer)):
			neuron = layer[j]
			neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

# Update network weights with error
# new_weight = old_weight - learning_rate * error * input
# This happens after forward and backward propagation are complete
def update_weights(network, row, l_rate):
	for i in range(len(network)):
		inputs = row[:-1]
		if i != 0:
			inputs = [neuron['output'] for neuron in network[i - 1]]
		for neuron in network[i]:
			for j in range(len(inputs)):
				neuron['weights'][j] -= l_rate * neuron['delta'] * inputs[j]
			neuron['weights'][-1] -= l_rate * neuron['delta']

# Train a network for a fixed number of epochs
# The 'train' argument is the input data, assumed to have a value for each 
# feature dimension, plus a numeric class label
# l_rate is the learning rate, normally a small value < 1
# n_epoch is how many epochs to train
# n_outputs is the number of output classes 
def train_network(network, train, l_rate, n_epoch, n_outputs):
	for epoch in range(n_epoch):
		sum_error = 0
		for row in train:
			outputs = forward_propagate(network, row)
			expected = [0 for i in range(n_outputs)]
			expected[row[-1]] = 1    # these two statements create a 1-Hot code for the expected class
			sum_error += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))])
			backward_propagate_error(network, expected)
			update_weights(network, row, l_rate)


# Make a prediction with a network
# Just do forward propagation, using the trained weights
def predict(network, row):
	outputs = forward_propagate(network, row)
	return outputs.index(max(outputs))      # note we choose the output with the maximum value

# Normalize the data
# except for the last column, which is assumed to be the class
def normalize(dataset):
	for i in range(len(dataset[0])-1):
		col = [row[i] for row in dataset]
		min_value = min(col)
		max_value = max(col)
		for row in dataset:
			row[i] = (row[i] - min_value) / (max_value - min_value)


def main():
	seed(1)

	# import the data from FlowersCombinedNN.csv
	# this is a comma-separated file with 5 columns, the last of which is the class
	# the first row is a header row

	# read the data into a list of lists
	dataset = []
	with open('FlowersCombinedNN.csv', newline='') as csvfile:
		reader = csv.reader(csvfile, delimiter=',')
		for row in reader:
			dataset.append(row)

	# separate the header row
	header = dataset[0]
	dataset = dataset[1:]

	# convert strings to integers for all columns except the last
	for i in range(len(dataset[0])-1):
		for row in dataset:
			row[i] = float(row[i].strip())

	# convert the class column to integers
	classdict = {}
	classnum = 0
	for row in dataset:
		if row[-1] not in classdict:
			classdict[row[-1]] = classnum
			classnum += 1
		row[-1] = classdict[row[-1]]

	# normalize the data
	normalize(dataset)

	# split the data into training and test sets 80: 20
	trainset = []
	testset = []
	for i in range(len(dataset)):
		if i % 5 == 0:
			testset.append(dataset[i])
		else:
			trainset.append(dataset[i])

	n_inputs = len(trainset[0]) - 1
	n_outputs = len(set([row[-1] for row in trainset]))
	network = initialize_network(n_inputs, 5, n_outputs)
	train_network(network, trainset, 0.9, 900, n_outputs)
	# test making predictions with the network 
	# print out accuracy and confusion matrix
	
	# initialize the confusion matrix
	confusion = []
	for i in range(n_outputs):
		confusion.append([0 for i in range(n_outputs)])
	correct = 0
	for row in tqdm(testset):
		prediction = predict(network, row)
		expected = row[-1]
		confusion[expected][prediction] += 1
		if prediction == expected:
			correct += 1
	print("Accuracy: ", correct/len(testset))
	print("Confusion matrix:")
	print(confusion)

	#visualize the confusion matrix
	import matplotlib.pyplot as plt
	import numpy as np
	fig, ax = plt.subplots()
	im = ax.imshow(confusion)
	ax.set_xticks(np.arange(n_outputs))
	ax.set_yticks(np.arange(n_outputs))
	ax.set_xticklabels(classdict.keys())
	ax.set_yticklabels(classdict.keys())
	plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
	for i in range(n_outputs):
		for j in range(n_outputs):
			text = ax.text(j, i, confusion[i][j], ha="center", va="center", color="w")
	ax.set_title("Confusion Matrix")
	fig.tight_layout()

	plt.show()


if __name__ == "__main__":
	main()