我正在用Python写一个神经网络,按照这里的例子.考虑到神经网络经过一万次训练后无法产生正确的值(在误差范围内),似乎反向传播算法不起作用.具体来说,我正在训练它来计算以下示例中的正弦函数:
import numpy as np class Neuralnet: def __init__(self, neurons): self.weights = [] self.inputs = [] self.outputs = [] self.errors = [] self.rate = .1 for layer in range(len(neurons)): self.inputs.append(np.empty(neurons[layer])) self.outputs.append(np.empty(neurons[layer])) self.errors.append(np.empty(neurons[layer])) for layer in range(len(neurons)-1): self.weights.append( np.random.normal( scale=1/np.sqrt(neurons[layer]), size=[neurons[layer], neurons[layer + 1]] ) ) def feedforward(self, inputs): self.inputs[0] = inputs for layer in range(len(self.weights)): self.outputs[layer] = np.tanh(self.inputs[layer]) self.inputs[layer + 1] = np.dot(self.weights[layer].T, self.outputs[layer]) self.outputs[-1] = np.tanh(self.inputs[-1]) def backpropagate(self, targets): gradient = 1 - self.outputs[-1] * self.outputs[-1] self.errors[-1] = gradient * (self.outputs[-1] - targets) for layer in reversed(range(len(self.errors) - 1)): gradient = 1 - self.outputs[layer] * self.outputs[layer] self.errors[layer] = gradient * np.dot(self.weights[layer], self.errors[layer + 1]) for layer in range(len(self.weights)): self.weights[layer] -= self.rate * np.outer(self.outputs[layer], self.errors[layer + 1]) def xor_example(): net = Neuralnet([2, 2, 1]) for step in range(100000): net.feedforward([0, 0]) net.backpropagate([-1]) net.feedforward([0, 1]) net.backpropagate([1]) net.feedforward([1, 0]) net.backpropagate([1]) net.feedforward([1, 1]) net.backpropagate([-1]) net.feedforward([1, 1]) print(net.outputs[-1]) def identity_example(): net = Neuralnet([1, 3, 1]) for step in range(100000): x = np.random.normal() net.feedforward([x]) net.backpropagate([np.tanh(x)]) net.feedforward([-2]) print(net.outputs[-1]) def sine_example(): net = Neuralnet([1, 6, 1]) for step in range(100000): x = np.random.normal() net.feedforward([x]) net.backpropagate([np.tanh(np.sin(x))]) net.feedforward([3]) print(net.outputs[-1]) sine_example()
输出无法接近tanh(sin(3)) = 0.140190616
.我怀疑涉及错误索引或对齐的错误,但Numpy没有引发任何这样的错误.我出错的地方有什么提示吗?
编辑:我忘了添加偏置神经元.这是更新的代码:
import numpy as np class Neuralnet: def __init__(self, neurons): self.weights = [] self.outputs = [] self.inputs = [] self.errors = [] self.offsets = [] self.rate = .01 for layer in range(len(neurons)-1): self.weights.append( np.random.normal( scale=1/np.sqrt(neurons[layer]), size=[neurons[layer], neurons[layer + 1]] ) ) self.outputs.append(np.empty(neurons[layer])) self.inputs.append(np.empty(neurons[layer])) self.errors.append(np.empty(neurons[layer])) self.offsets.append(np.random.normal(scale=1/np.sqrt(neurons[layer]), size=neurons[layer + 1])) self.inputs.append(np.empty(neurons[-1])) self.errors.append(np.empty(neurons[-1])) def feedforward(self, inputs): self.inputs[0] = inputs for layer in range(len(self.weights)): self.outputs[layer] = np.tanh(self.inputs[layer]) self.inputs[layer + 1] = self.offsets[layer] + np.dot(self.weights[layer].T, self.outputs[layer]) def backpropagate(self, targets): self.errors[-1] = self.inputs[-1] - targets for layer in reversed(range(len(self.errors) - 1)): gradient = 1 - self.outputs[layer] * self.outputs[layer] self.errors[layer] = gradient * np.dot(self.weights[layer], self.errors[layer + 1]) for layer in range(len(self.weights)): self.weights[layer] -= self.rate * np.outer(self.outputs[layer], self.errors[layer + 1]) self.offsets[layer] -= self.rate * self.errors[layer + 1] def sine_example(): net = Neuralnet([1, 5, 1]) for step in range(10000): x = np.random.uniform(-5, 5) net.feedforward([x]) net.backpropagate([np.sin(x)]) net.feedforward([np.pi]) print(net.inputs[-1]) def xor_example(): net = Neuralnet([2, 2, 1]) for step in range(10000): net.feedforward([0, 0]) net.backpropagate([-1]) net.feedforward([0, 1]) net.backpropagate([1]) net.feedforward([1, 0]) net.backpropagate([1]) net.feedforward([1, 1]) net.backpropagate([-1]) net.feedforward([1, 1]) print(net.outputs[-1]) def identity_example(): net = Neuralnet([1, 3, 1]) for step in range(10000): x = np.random.normal() net.feedforward([x]) net.backpropagate([x]) net.feedforward([-2]) print(net.outputs[-1]) identity_example()
Anton.. 8
我认为你以错误的方式训练NN.您有一个超过10000次迭代的循环,并在每个循环中提供一个新样本.在这种情况下,NN永远不会受到训练.
(声明错了!看到更新!)
您需要做的是生成大量真实样本Y = sin(X)
,将其提供给您的网络ONCE并向前和向后迭代训练集,以便最小化成本函数.要检查算法,您可能需要根据迭代次数绘制成本函数,并确保成本下降.
另一个重点是权重的初始化.您的数字非常大,网络将花费大量时间来收敛,尤其是在使用低费率时.在一些小范围内[-eps .. eps]
均匀生成初始权重是一种很好的做法.
在我的代码中,我实现了两个不同的激活函数:sigmoid()
和tanh()
.您需要根据所选功能扩展输入:[0 .. 1]
和[-1 .. 1]
.
以下是一些显示成本函数以及由此产生的预测sigmoid()
和tanh()
激活函数的图像:
正如你所看到的那样,sigmoid()
激活会带来更好的结果tanh()
.
[1, 6, 1]
与使用4层的更大网络相比, 我在使用网络时得到了更好的预测[1, 6, 4, 1]
.因此,NN的大小并不总是关键因素.以下是对4层提到的网络的预测:
这是我的代码和一些评论.我试图在可能的地方使用你的符号.
import numpy as np import math import matplotlib.pyplot as plt class Neuralnet: def __init__(self, neurons, activation): self.weights = [] self.inputs = [] self.outputs = [] self.errors = [] self.rate = 0.5 self.activation = activation #sigmoid or tanh self.neurons = neurons self.L = len(self.neurons) #number of layers eps = 0.12; # range for uniform distribution -eps..+eps for layer in range(len(neurons)-1): self.weights.append(np.random.uniform(-eps,eps,size=(neurons[layer+1], neurons[layer]+1))) ################################################################################################### def train(self, X, Y, iter_count): m = X.shape[0]; for layer in range(self.L): self.inputs.append(np.empty([m, self.neurons[layer]])) self.errors.append(np.empty([m, self.neurons[layer]])) if (layer < self.L -1): self.outputs.append(np.empty([m, self.neurons[layer]+1])) else: self.outputs.append(np.empty([m, self.neurons[layer]])) #accumulate the cost function J_history = np.zeros([iter_count, 1]) for i in range(iter_count): self.feedforward(X) J = self.cost(Y, self.outputs[self.L-1]) J_history[i, 0] = J self.backpropagate(Y) #plot the cost function to check the descent plt.plot(J_history) plt.show() ################################################################################################### def cost(self, Y, H): J = np.sum(np.sum(np.power((Y - H), 2), axis=0))/(2*m) return J ################################################################################################### def feedforward(self, X): m = X.shape[0]; self.outputs[0] = np.concatenate( (np.ones([m, 1]), X), axis=1) for i in range(1, self.L): self.inputs[i] = np.dot( self.outputs[i-1], self.weights[i-1].T ) if (self.activation == 'sigmoid'): output_temp = self.sigmoid(self.inputs[i]) elif (self.activation == 'tanh'): output_temp = np.tanh(self.inputs[i]) if (i < self.L - 1): self.outputs[i] = np.concatenate( (np.ones([m, 1]), output_temp), axis=1) else: self.outputs[i] = output_temp ################################################################################################### def backpropagate(self, Y): self.errors[self.L-1] = self.outputs[self.L-1] - Y for i in range(self.L - 2, 0, -1): if (self.activation == 'sigmoid'): self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * self.sigmoid_prime(self.inputs[i]) elif (self.activation == 'tanh'): self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * (1 - self.outputs[i][:, 1:]*self.outputs[i][:, 1:]) for i in range(0, self.L-1): grad = np.dot(self.errors[i+1].T, self.outputs[i]) / m self.weights[i] = self.weights[i] - self.rate*grad ################################################################################################### def sigmoid(self, z): s = 1.0/(1.0 + np.exp(-z)) return s ################################################################################################### def sigmoid_prime(self, z): s = self.sigmoid(z)*(1 - self.sigmoid(z)) return s ################################################################################################### def predict(self, X, weights): m = X.shape[0]; self.inputs = [] self.outputs = [] self.weights = weights for layer in range(self.L): self.inputs.append(np.empty([m, self.neurons[layer]])) if (layer < self.L -1): self.outputs.append(np.empty([m, self.neurons[layer]+1])) else: self.outputs.append(np.empty([m, self.neurons[layer]])) self.feedforward(X) return self.outputs[self.L-1] ################################################################################################### # MAIN PART activation1 = 'sigmoid' # the input should be scaled into [ 0..1] activation2 = 'tanh' # the input should be scaled into [-1..1] activation = activation1 net = Neuralnet([1, 6, 1], activation) # structure of the NN and its activation function ########################################################################################## # TRAINING m = 1000 #size of the training set X = np.linspace(0, 4*math.pi, num = m).reshape(m, 1); # input training set Y = np.sin(X) # target kx = 0.1 # noise parameter noise = (2.0*np.random.uniform(0, kx, m) - kx).reshape(m, 1) Y = Y + noise # noisy target # scaling of the target depending on the activation function if (activation == 'sigmoid'): Y_scaled = (Y/(1+kx) + 1)/2.0 elif (activation == 'tanh'): Y_scaled = Y/(1+kx) # number of the iteration for the training stage iter_count = 20000 net.train(X, Y_scaled, iter_count) #training # gained weights trained_weights = net.weights ########################################################################################## # PREDICTION m_new = 40 #size of the prediction set X_new = np.linspace(0, 4*math.pi, num = m_new).reshape(m_new, 1); Y_new = net.predict(X_new, trained_weights) # prediction #rescaling of the result if (activation == 'sigmoid'): Y_new = (2.0*Y_new - 1.0) * (1+kx) elif (activation == 'tanh'): Y_new = Y_new * (1+kx) # visualization plt.plot(X, Y) plt.plot(X_new, Y_new, 'ro') plt.show() raw_input('press any key to exit')
UPDATE
我想收回有关您的代码中使用的培训方法的声明.实际上,每次迭代仅使用一个样本来训练网络.我使用sigmoid和tanh激活函数在在线培训中获得了有趣的结果:
使用Sigmoid进行在线培训(成本函数和预测)
使用Tanh进行在线培训(成本函数和预测)
可以看出,选择Sigmoid作为激活函数可以提供更好的性能.成本函数在离线培训期间看起来不那么好,但至少它往往会下降.
我在你的实现中绘制了成本函数,它看起来也很不稳定:
使用sigmoid甚至ReLU函数尝试代码可能是个好主意.
这是更新的源代码.要切换online
和offline
训练模式,只需更改method
变量即可.
import numpy as np import math import matplotlib.pyplot as plt class Neuralnet: def __init__(self, neurons, activation): self.weights = [] self.inputs = [] self.outputs = [] self.errors = [] self.rate = 0.2 self.activation = activation #sigmoid or tanh self.neurons = neurons self.L = len(self.neurons) #number of layers eps = 0.12; #range for uniform distribution -eps..+eps for layer in range(len(neurons)-1): self.weights.append(np.random.uniform(-eps,eps,size=(neurons[layer+1], neurons[layer]+1))) ################################################################################################### def train(self, X, Y, iter_count): m = X.shape[0]; for layer in range(self.L): self.inputs.append(np.empty([m, self.neurons[layer]])) self.errors.append(np.empty([m, self.neurons[layer]])) if (layer < self.L -1): self.outputs.append(np.empty([m, self.neurons[layer]+1])) else: self.outputs.append(np.empty([m, self.neurons[layer]])) #accumulate the cost function J_history = np.zeros([iter_count, 1]) for i in range(iter_count): self.feedforward(X) J = self.cost(Y, self.outputs[self.L-1]) J_history[i, 0] = J self.backpropagate(Y) #plot the cost function to check the descent #plt.plot(J_history) #plt.show() ################################################################################################### def cost(self, Y, H): J = np.sum(np.sum(np.power((Y - H), 2), axis=0))/(2*m) return J ################################################################################################### def cost_online(self, min_x, max_x, iter_number): h_arr = np.zeros([iter_number, 1]) y_arr = np.zeros([iter_number, 1]) for step in range(iter_number): x = np.random.uniform(min_x, max_x, 1).reshape(1, 1) self.feedforward(x) h_arr[step, 0] = self.outputs[-1] y_arr[step, 0] = np.sin(x) J = np.sum(np.sum(np.power((y_arr - h_arr), 2), axis=0))/(2*iter_number) return J ################################################################################################### def feedforward(self, X): m = X.shape[0]; self.outputs[0] = np.concatenate( (np.ones([m, 1]), X), axis=1) for i in range(1, self.L): self.inputs[i] = np.dot( self.outputs[i-1], self.weights[i-1].T ) if (self.activation == 'sigmoid'): output_temp = self.sigmoid(self.inputs[i]) elif (self.activation == 'tanh'): output_temp = np.tanh(self.inputs[i]) if (i < self.L - 1): self.outputs[i] = np.concatenate( (np.ones([m, 1]), output_temp), axis=1) else: self.outputs[i] = output_temp ################################################################################################### def backpropagate(self, Y): self.errors[self.L-1] = self.outputs[self.L-1] - Y for i in range(self.L - 2, 0, -1): if (self.activation == 'sigmoid'): self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * self.sigmoid_prime(self.inputs[i]) elif (self.activation == 'tanh'): self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * (1 - self.outputs[i][:, 1:]*self.outputs[i][:, 1:]) for i in range(0, self.L-1): grad = np.dot(self.errors[i+1].T, self.outputs[i]) / m self.weights[i] = self.weights[i] - self.rate*grad ################################################################################################### def sigmoid(self, z): s = 1.0/(1.0 + np.exp(-z)) return s ################################################################################################### def sigmoid_prime(self, z): s = self.sigmoid(z)*(1 - self.sigmoid(z)) return s ################################################################################################### def predict(self, X, weights): m = X.shape[0]; self.inputs = [] self.outputs = [] self.weights = weights for layer in range(self.L): self.inputs.append(np.empty([m, self.neurons[layer]])) if (layer < self.L -1): self.outputs.append(np.empty([m, self.neurons[layer]+1])) else: self.outputs.append(np.empty([m, self.neurons[layer]])) self.feedforward(X) return self.outputs[self.L-1] ################################################################################################### # MAIN PART activation1 = 'sigmoid' #the input should be scaled into [0..1] activation2 = 'tanh' #the input should be scaled into [-1..1] activation = activation1 net = Neuralnet([1, 6, 1], activation) # structure of the NN and its activation function method1 = 'online' method2 = 'offline' method = method1 kx = 0.1 #noise parameter ################################################################################################### # TRAINING if (method == 'offline'): m = 1000 #size of the training set X = np.linspace(0, 4*math.pi, num = m).reshape(m, 1); #input training set Y = np.sin(X) #target noise = (2.0*np.random.uniform(0, kx, m) - kx).reshape(m, 1) Y = Y + noise #noisy target #scaling of the target depending on the activation function if (activation == 'sigmoid'): Y_scaled = (Y/(1+kx) + 1)/2.0 elif (activation == 'tanh'): Y_scaled = Y/(1+kx) #number of the iteration for the training stage iter_count = 20000 net.train(X, Y_scaled, iter_count) #training elif (method == 'online'): sampling_count = 100000 # number of samplings during the training stage m = 1 #batch size iter_count = sampling_count/m for layer in range(net.L): net.inputs.append(np.empty([m, net.neurons[layer]])) net.errors.append(np.empty([m, net.neurons[layer]])) if (layer < net.L -1): net.outputs.append(np.empty([m, net.neurons[layer]+1])) else: net.outputs.append(np.empty([m, net.neurons[layer]])) J_history = [] step_history = [] for i in range(iter_count): X = np.random.uniform(0, 4*math.pi, m).reshape(m, 1) Y = np.sin(X) #target noise = (2.0*np.random.uniform(0, kx, m) - kx).reshape(m, 1) Y = Y + noise #noisy target #scaling of the target depending on the activation function if (activation == 'sigmoid'): Y_scaled = (Y/(1+kx) + 1)/2.0 elif (activation == 'tanh'): Y_scaled = Y/(1+kx) net.feedforward(X) net.backpropagate(Y_scaled) if (np.remainder(i, 1000) == 0): J = net.cost_online(0, 4*math.pi, 1000) J_history.append(J) step_history.append(i) plt.plot(step_history, J_history) plt.title('Batch size ' + str(m) + ', rate ' + str(net.rate) + ', samples ' + str(sampling_count)) #plt.ylim([0, 0.1]) plt.show() #gained weights trained_weights = net.weights ########################################################################################## # PREDICTION m_new = 40 #size of the prediction set X_new = np.linspace(0, 4*math.pi, num = m_new).reshape(m_new, 1); Y_new = net.predict(X_new, trained_weights) #prediction #rescaling of the result if (activation == 'sigmoid'): Y_new = (2.0*Y_new - 1.0) * (1+kx) elif (activation == 'tanh'): Y_new = Y_new * (1+kx) #visualization #fake sine curve to show the ideal signal if (method == 'online'): X = np.linspace(0, 4*math.pi, num = 100) Y = np.sin(X) plt.plot(X, Y) plt.plot(X_new, Y_new, 'ro') if (method == 'online'): plt.title('Batch size ' + str(m) + ', rate ' + str(net.rate) + ', samples ' + str(sampling_count)) plt.ylim([-1.5, 1.5]) plt.show() raw_input('press any key to exit')
现在我对你当前的代码有一些评论:
你的正弦函数看起来像这样:
def sine_example(): net = Neuralnet([1, 6, 1]) for step in range(100000): x = np.random.normal() net.feedforward([x]) net.backpropagate([np.tanh(np.sin(x))]) net.feedforward([3]) print(net.outputs[-1])
我不知道为什么你在目标输入中使用tanh.如果你真的想使用正弦的tanh作为目标,你需要将它缩放到[-1..1]
,因为tanh(sin(x))返回范围内的值[-0.76..0.76]
.
接下来是训练集的范围.您x = np.random.normal()
用来生成样本.以下是此类输入的分布:
之后,您希望您的网络预测正弦3
,但网络在训练阶段几乎从未见过这个数字.我会在更广泛的范围内使用均匀分布来代替样本生成.
我认为你以错误的方式训练NN.您有一个超过10000次迭代的循环,并在每个循环中提供一个新样本.在这种情况下,NN永远不会受到训练.
(声明错了!看到更新!)
您需要做的是生成大量真实样本Y = sin(X)
,将其提供给您的网络ONCE并向前和向后迭代训练集,以便最小化成本函数.要检查算法,您可能需要根据迭代次数绘制成本函数,并确保成本下降.
另一个重点是权重的初始化.您的数字非常大,网络将花费大量时间来收敛,尤其是在使用低费率时.在一些小范围内[-eps .. eps]
均匀生成初始权重是一种很好的做法.
在我的代码中,我实现了两个不同的激活函数:sigmoid()
和tanh()
.您需要根据所选功能扩展输入:[0 .. 1]
和[-1 .. 1]
.
以下是一些显示成本函数以及由此产生的预测sigmoid()
和tanh()
激活函数的图像:
正如你所看到的那样,sigmoid()
激活会带来更好的结果tanh()
.
[1, 6, 1]
与使用4层的更大网络相比, 我在使用网络时得到了更好的预测[1, 6, 4, 1]
.因此,NN的大小并不总是关键因素.以下是对4层提到的网络的预测:
这是我的代码和一些评论.我试图在可能的地方使用你的符号.
import numpy as np import math import matplotlib.pyplot as plt class Neuralnet: def __init__(self, neurons, activation): self.weights = [] self.inputs = [] self.outputs = [] self.errors = [] self.rate = 0.5 self.activation = activation #sigmoid or tanh self.neurons = neurons self.L = len(self.neurons) #number of layers eps = 0.12; # range for uniform distribution -eps..+eps for layer in range(len(neurons)-1): self.weights.append(np.random.uniform(-eps,eps,size=(neurons[layer+1], neurons[layer]+1))) ################################################################################################### def train(self, X, Y, iter_count): m = X.shape[0]; for layer in range(self.L): self.inputs.append(np.empty([m, self.neurons[layer]])) self.errors.append(np.empty([m, self.neurons[layer]])) if (layer < self.L -1): self.outputs.append(np.empty([m, self.neurons[layer]+1])) else: self.outputs.append(np.empty([m, self.neurons[layer]])) #accumulate the cost function J_history = np.zeros([iter_count, 1]) for i in range(iter_count): self.feedforward(X) J = self.cost(Y, self.outputs[self.L-1]) J_history[i, 0] = J self.backpropagate(Y) #plot the cost function to check the descent plt.plot(J_history) plt.show() ################################################################################################### def cost(self, Y, H): J = np.sum(np.sum(np.power((Y - H), 2), axis=0))/(2*m) return J ################################################################################################### def feedforward(self, X): m = X.shape[0]; self.outputs[0] = np.concatenate( (np.ones([m, 1]), X), axis=1) for i in range(1, self.L): self.inputs[i] = np.dot( self.outputs[i-1], self.weights[i-1].T ) if (self.activation == 'sigmoid'): output_temp = self.sigmoid(self.inputs[i]) elif (self.activation == 'tanh'): output_temp = np.tanh(self.inputs[i]) if (i < self.L - 1): self.outputs[i] = np.concatenate( (np.ones([m, 1]), output_temp), axis=1) else: self.outputs[i] = output_temp ################################################################################################### def backpropagate(self, Y): self.errors[self.L-1] = self.outputs[self.L-1] - Y for i in range(self.L - 2, 0, -1): if (self.activation == 'sigmoid'): self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * self.sigmoid_prime(self.inputs[i]) elif (self.activation == 'tanh'): self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * (1 - self.outputs[i][:, 1:]*self.outputs[i][:, 1:]) for i in range(0, self.L-1): grad = np.dot(self.errors[i+1].T, self.outputs[i]) / m self.weights[i] = self.weights[i] - self.rate*grad ################################################################################################### def sigmoid(self, z): s = 1.0/(1.0 + np.exp(-z)) return s ################################################################################################### def sigmoid_prime(self, z): s = self.sigmoid(z)*(1 - self.sigmoid(z)) return s ################################################################################################### def predict(self, X, weights): m = X.shape[0]; self.inputs = [] self.outputs = [] self.weights = weights for layer in range(self.L): self.inputs.append(np.empty([m, self.neurons[layer]])) if (layer < self.L -1): self.outputs.append(np.empty([m, self.neurons[layer]+1])) else: self.outputs.append(np.empty([m, self.neurons[layer]])) self.feedforward(X) return self.outputs[self.L-1] ################################################################################################### # MAIN PART activation1 = 'sigmoid' # the input should be scaled into [ 0..1] activation2 = 'tanh' # the input should be scaled into [-1..1] activation = activation1 net = Neuralnet([1, 6, 1], activation) # structure of the NN and its activation function ########################################################################################## # TRAINING m = 1000 #size of the training set X = np.linspace(0, 4*math.pi, num = m).reshape(m, 1); # input training set Y = np.sin(X) # target kx = 0.1 # noise parameter noise = (2.0*np.random.uniform(0, kx, m) - kx).reshape(m, 1) Y = Y + noise # noisy target # scaling of the target depending on the activation function if (activation == 'sigmoid'): Y_scaled = (Y/(1+kx) + 1)/2.0 elif (activation == 'tanh'): Y_scaled = Y/(1+kx) # number of the iteration for the training stage iter_count = 20000 net.train(X, Y_scaled, iter_count) #training # gained weights trained_weights = net.weights ########################################################################################## # PREDICTION m_new = 40 #size of the prediction set X_new = np.linspace(0, 4*math.pi, num = m_new).reshape(m_new, 1); Y_new = net.predict(X_new, trained_weights) # prediction #rescaling of the result if (activation == 'sigmoid'): Y_new = (2.0*Y_new - 1.0) * (1+kx) elif (activation == 'tanh'): Y_new = Y_new * (1+kx) # visualization plt.plot(X, Y) plt.plot(X_new, Y_new, 'ro') plt.show() raw_input('press any key to exit')
UPDATE
我想收回有关您的代码中使用的培训方法的声明.实际上,每次迭代仅使用一个样本来训练网络.我使用sigmoid和tanh激活函数在在线培训中获得了有趣的结果:
使用Sigmoid进行在线培训(成本函数和预测)
使用Tanh进行在线培训(成本函数和预测)
可以看出,选择Sigmoid作为激活函数可以提供更好的性能.成本函数在离线培训期间看起来不那么好,但至少它往往会下降.
我在你的实现中绘制了成本函数,它看起来也很不稳定:
使用sigmoid甚至ReLU函数尝试代码可能是个好主意.
这是更新的源代码.要切换online
和offline
训练模式,只需更改method
变量即可.
import numpy as np import math import matplotlib.pyplot as plt class Neuralnet: def __init__(self, neurons, activation): self.weights = [] self.inputs = [] self.outputs = [] self.errors = [] self.rate = 0.2 self.activation = activation #sigmoid or tanh self.neurons = neurons self.L = len(self.neurons) #number of layers eps = 0.12; #range for uniform distribution -eps..+eps for layer in range(len(neurons)-1): self.weights.append(np.random.uniform(-eps,eps,size=(neurons[layer+1], neurons[layer]+1))) ################################################################################################### def train(self, X, Y, iter_count): m = X.shape[0]; for layer in range(self.L): self.inputs.append(np.empty([m, self.neurons[layer]])) self.errors.append(np.empty([m, self.neurons[layer]])) if (layer < self.L -1): self.outputs.append(np.empty([m, self.neurons[layer]+1])) else: self.outputs.append(np.empty([m, self.neurons[layer]])) #accumulate the cost function J_history = np.zeros([iter_count, 1]) for i in range(iter_count): self.feedforward(X) J = self.cost(Y, self.outputs[self.L-1]) J_history[i, 0] = J self.backpropagate(Y) #plot the cost function to check the descent #plt.plot(J_history) #plt.show() ################################################################################################### def cost(self, Y, H): J = np.sum(np.sum(np.power((Y - H), 2), axis=0))/(2*m) return J ################################################################################################### def cost_online(self, min_x, max_x, iter_number): h_arr = np.zeros([iter_number, 1]) y_arr = np.zeros([iter_number, 1]) for step in range(iter_number): x = np.random.uniform(min_x, max_x, 1).reshape(1, 1) self.feedforward(x) h_arr[step, 0] = self.outputs[-1] y_arr[step, 0] = np.sin(x) J = np.sum(np.sum(np.power((y_arr - h_arr), 2), axis=0))/(2*iter_number) return J ################################################################################################### def feedforward(self, X): m = X.shape[0]; self.outputs[0] = np.concatenate( (np.ones([m, 1]), X), axis=1) for i in range(1, self.L): self.inputs[i] = np.dot( self.outputs[i-1], self.weights[i-1].T ) if (self.activation == 'sigmoid'): output_temp = self.sigmoid(self.inputs[i]) elif (self.activation == 'tanh'): output_temp = np.tanh(self.inputs[i]) if (i < self.L - 1): self.outputs[i] = np.concatenate( (np.ones([m, 1]), output_temp), axis=1) else: self.outputs[i] = output_temp ################################################################################################### def backpropagate(self, Y): self.errors[self.L-1] = self.outputs[self.L-1] - Y for i in range(self.L - 2, 0, -1): if (self.activation == 'sigmoid'): self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * self.sigmoid_prime(self.inputs[i]) elif (self.activation == 'tanh'): self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * (1 - self.outputs[i][:, 1:]*self.outputs[i][:, 1:]) for i in range(0, self.L-1): grad = np.dot(self.errors[i+1].T, self.outputs[i]) / m self.weights[i] = self.weights[i] - self.rate*grad ################################################################################################### def sigmoid(self, z): s = 1.0/(1.0 + np.exp(-z)) return s ################################################################################################### def sigmoid_prime(self, z): s = self.sigmoid(z)*(1 - self.sigmoid(z)) return s ################################################################################################### def predict(self, X, weights): m = X.shape[0]; self.inputs = [] self.outputs = [] self.weights = weights for layer in range(self.L): self.inputs.append(np.empty([m, self.neurons[layer]])) if (layer < self.L -1): self.outputs.append(np.empty([m, self.neurons[layer]+1])) else: self.outputs.append(np.empty([m, self.neurons[layer]])) self.feedforward(X) return self.outputs[self.L-1] ################################################################################################### # MAIN PART activation1 = 'sigmoid' #the input should be scaled into [0..1] activation2 = 'tanh' #the input should be scaled into [-1..1] activation = activation1 net = Neuralnet([1, 6, 1], activation) # structure of the NN and its activation function method1 = 'online' method2 = 'offline' method = method1 kx = 0.1 #noise parameter ################################################################################################### # TRAINING if (method == 'offline'): m = 1000 #size of the training set X = np.linspace(0, 4*math.pi, num = m).reshape(m, 1); #input training set Y = np.sin(X) #target noise = (2.0*np.random.uniform(0, kx, m) - kx).reshape(m, 1) Y = Y + noise #noisy target #scaling of the target depending on the activation function if (activation == 'sigmoid'): Y_scaled = (Y/(1+kx) + 1)/2.0 elif (activation == 'tanh'): Y_scaled = Y/(1+kx) #number of the iteration for the training stage iter_count = 20000 net.train(X, Y_scaled, iter_count) #training elif (method == 'online'): sampling_count = 100000 # number of samplings during the training stage m = 1 #batch size iter_count = sampling_count/m for layer in range(net.L): net.inputs.append(np.empty([m, net.neurons[layer]])) net.errors.append(np.empty([m, net.neurons[layer]])) if (layer < net.L -1): net.outputs.append(np.empty([m, net.neurons[layer]+1])) else: net.outputs.append(np.empty([m, net.neurons[layer]])) J_history = [] step_history = [] for i in range(iter_count): X = np.random.uniform(0, 4*math.pi, m).reshape(m, 1) Y = np.sin(X) #target noise = (2.0*np.random.uniform(0, kx, m) - kx).reshape(m, 1) Y = Y + noise #noisy target #scaling of the target depending on the activation function if (activation == 'sigmoid'): Y_scaled = (Y/(1+kx) + 1)/2.0 elif (activation == 'tanh'): Y_scaled = Y/(1+kx) net.feedforward(X) net.backpropagate(Y_scaled) if (np.remainder(i, 1000) == 0): J = net.cost_online(0, 4*math.pi, 1000) J_history.append(J) step_history.append(i) plt.plot(step_history, J_history) plt.title('Batch size ' + str(m) + ', rate ' + str(net.rate) + ', samples ' + str(sampling_count)) #plt.ylim([0, 0.1]) plt.show() #gained weights trained_weights = net.weights ########################################################################################## # PREDICTION m_new = 40 #size of the prediction set X_new = np.linspace(0, 4*math.pi, num = m_new).reshape(m_new, 1); Y_new = net.predict(X_new, trained_weights) #prediction #rescaling of the result if (activation == 'sigmoid'): Y_new = (2.0*Y_new - 1.0) * (1+kx) elif (activation == 'tanh'): Y_new = Y_new * (1+kx) #visualization #fake sine curve to show the ideal signal if (method == 'online'): X = np.linspace(0, 4*math.pi, num = 100) Y = np.sin(X) plt.plot(X, Y) plt.plot(X_new, Y_new, 'ro') if (method == 'online'): plt.title('Batch size ' + str(m) + ', rate ' + str(net.rate) + ', samples ' + str(sampling_count)) plt.ylim([-1.5, 1.5]) plt.show() raw_input('press any key to exit')
现在我对你当前的代码有一些评论:
你的正弦函数看起来像这样:
def sine_example(): net = Neuralnet([1, 6, 1]) for step in range(100000): x = np.random.normal() net.feedforward([x]) net.backpropagate([np.tanh(np.sin(x))]) net.feedforward([3]) print(net.outputs[-1])
我不知道为什么你在目标输入中使用tanh.如果你真的想使用正弦的tanh作为目标,你需要将它缩放到[-1..1]
,因为tanh(sin(x))返回范围内的值[-0.76..0.76]
.
接下来是训练集的范围.您x = np.random.normal()
用来生成样本.以下是此类输入的分布:
之后,您希望您的网络预测正弦3
,但网络在训练阶段几乎从未见过这个数字.我会在更广泛的范围内使用均匀分布来代替样本生成.