# Subsample the data for more efficient code execution in this exercise num_training = 5000 mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask]
# Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print(X_train.shape, X_test.shape)
(5000, 3072) (500, 3072)
训练部分代码:
1 2 3 4 5
# Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train)
1 2 3
deftrian(self, X, y): self.X_train = X self.y_train = y
很明显,训练部分就是将训练数据存下来了而已,没有进行任何操作。
测试部分(使用 L2 distance):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Let's compare how fast the implementations are deftime_function(f, *args): import time tic = time.time() f(*args) toc = time.time() return toc - tic
two_loop_time = time_function(classifier.compute_distances_two_loops, X_test) print('Two loop version took %f seconds' % two_loop_time)
one_loop_time = time_function(classifier.compute_distances_one_loop, X_test) print('One loop version took %f seconds' % one_loop_time)
no_loop_time = time_function(classifier.compute_distances_no_loops, X_test) print('No loop version took %f seconds' % no_loop_time)
1 2 3 4
# two loop for i in xrange(num_test): for j in xrange(num_train): dists[i][j] = np.sqrt(np.sum(np.square(self.X_train[j,:] - X[i,:])))
1 2 3 4
# one loop for i in xrange(num_test): temp = np.sqrt(np.sum(np.square(self.X_train - X[i,:]), axis = 1)) dists[i,:] = temp.T
由于我们可以选择不同的 k 值,所以可以使用交叉验证来测试不同k值下,knn 在数据集中的准确率,于是可以选择出最佳的 k 值。
对每一个k值,不能只进行一次测试,误差可能导致结果选择不准确,所以使用5折交叉验证。将训练数据分成五等份(随机划分),每次使用一份作为验证集,其余的作为训练集,进行五次计算,平均值作为其最终的准确率。
其中 Li 就是损失函数,首先这里我们判断一个检测样本的划分是看 f 函数得出的值最大的那一维,所以这里损失函数表达的是其它类的得分与正确类的得分之间的差值,并且差距至少为 Delta。这个损失函数一般称为 hinge loss。
加入Regularization :
如果只用上面的损失函数来进行计算,它对权值 W 的取值大小是不敏感的,也就是 W 取值很大或很小时都能得到相同的结果,加入正则化,可以迫使 W 取较小的值,这里当然也有一定防止过拟合的作用。
正则函数定义:
正则化后的损失函数就是上面的 L , 在交叉验证中,Delta 的选取其实并不十分重要(一般等于1.0),因为算法总是能够适应过来,更为重要的是正则化强度 lambda 的选取。
jupyter notebook :
数据预处理 :
这里数据集选取的大小就不像 knn 这么保守,
1 2 3 4 5
# As a sanity check, print out the shapes of the data print('Training data shape: ', X_train.shape) print('Validation data shape: ', X_val.shape) print('Test data shape: ', X_test.shape) print('dev data shape: ', X_dev.shape)
Training data shape: (49000, 3072) Validation data shape: (1000, 3072) Test data shape: (1000, 3072) dev data shape: (500, 3072)
# first: compute the image mean based on the training data mean_image = np.mean(X_train, axis=0)
# second: subtract the mean image from train and test data X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image
# third: append the bias dimension of ones (i.e. bias trick) so that our SVM # only has to worry about optimizing a single weight matrix W. X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
# Evaluate the best svm on test set y_test_pred = best_svm.predict(X_test) test_accuracy = np.mean(y_test == y_test_pred) print('linear SVM on raw pixels final test set accuracy: %f' % test_accuracy)
linear SVM on raw pixels final test set accuracy: 0.370000
print('Train data shape: ', X_train.shape) print('Train labels shape: ', y_train.shape) print('Validation data shape: ', X_val.shape) print('Validation labels shape: ', y_val.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape) print('dev data shape: ', X_dev.shape) print('dev labels shape: ', y_dev.shape)
Train data shape: (49000, 3073) Train labels shape: (49000,) Validation data shape: (1000, 3073) Validation labels shape: (1000,) Test data shape: (1000, 3073) Test labels shape: (1000,) dev data shape: (500, 3073) dev labels shape: (500,)
这里使用的数据同样是 CIFAR10 ,
代码首先读入数据,并将每一幅图片都转化为行向量。
将数据划分为四部分,和 SVM 时操作一样。
零均值化。
使用 bias trick,加入一列 1 向量。
损失函数和梯度 :
首先实现循环版本的代码,
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
m = X.shape[0] c = W.shape[1] for i in xrange(m) : score = X[i].dot(W) score_mean = score - np.max(score) score_exp = np.exp(score_mean) sum_se = np.sum(score_exp) loss += (np.log(sum_se) - score_mean[y[i]]) for j in xrange(c): dW[:,j] += (score_exp[j] / sum_se) * X[i] if j == y[i]: dW[:,j] -= X[i] loss /= m loss += 0.5 * reg * np.sum(W * W) dW /= m dW += reg * W
循环中,将计算分为了每个样本分别计算,利用了损失函数的变形来编写,其中,
循环中就体现了上面的式子,在最后再加入正则化即可。
1 2 3 4 5 6 7
# Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)
# As a rough sanity check, our loss should be something close to -log(0.1). print('loss: %f' % loss) print('sanity check: %f' % (-np.log(0.1)))
loss: 2.367492 sanity check: 2.302585
上面这一段使用了随机的初始权值来对 loss 进行了计算,然后与 -log(0.1) 进行了比较,在期望下两个值之间应该十分接近,因为在随机情况下,每一个样本在 softmax 之后它得每一类划分的概率应该都是 0.1,在取熵之后就是接近 -log(0.1)。
it_num = 1500 for lr in learning_rates : for rs in regularization_strengths : softmax = Softmax() softmax.train(X_train, y_train, lr, rs, it_num) y_train_pred = softmax.predict(X_train) y_val_pred = softmax.predict(X_val) ytr_acc = np.mean(y_train == y_train_pred) yval_acc = np.mean(y_val == y_val_pred) results[(lr,rs)] = (ytr_acc, yval_acc) if yval_acc > best_val : best_val = yval_acc best_softmax = print('best validation accuracy achieved during cross-validation: %f' % best_val)
best validation accuracy achieved during cross-validation: 0.372000
得到交叉验证集中的最高准确率为 37.2%。
1 2 3 4
# Evaluate the best softmax on test set y_test_pred = best_softmax.predict(X_test) test_accuracy = np.mean(y_test == y_test_pred) print('softmax on raw pixels final test set accuracy: %f' % (test_accuracy, ))
softmax on raw pixels final test set accuracy: 0.369000
for hs in hidden_size: for lr in learning_rate: for re in reg: net = TwoLayerNet(input_size, hs, num_classes) # Train the network stats = net.train(X_train, y_train, X_val, y_val, num_iters=4000, batch_size=400, learning_rate=lr, learning_rate_decay=0.99, reg=re, verbose=True) # Predict on the validation set val_acc = (net.predict(X_val) == y_val).mean() print('Validation accuracy: ', val_acc, ' hs = ', hs, ' lr = ', lr, ' re = ', re) if val_acc > best_val: best_val = val_acc best_net = net
Validation accuracy: 0.528 hs = 500 lr = 0.0005 re = 0.2
这里的训练过程非常慢,事实上我看效果不好的参数就主动停止了它。已经可以预见在网络更为复杂的时候,光用 cpu 来跑神经网络的困难所在了。
num_color_bins = 10# Number of bins in the color histogram feature_fns = [hog_feature, lambda img: color_histogram_hsv(img, nbin=num_color_bins)] X_train_feats = extract_features(X_train, feature_fns, verbose=True) X_val_feats = extract_features(X_val, feature_fns) X_test_feats = extract_features(X_test, feature_fns)
# Preprocessing: Subtract the mean feature mean_feat = np.mean(X_train_feats, axis=0, keepdims=True) X_train_feats -= mean_feat X_val_feats -= mean_feat X_test_feats -= mean_feat
# Preprocessing: Divide by standard deviation. This ensures that each feature # has roughly the same scale. std_feat = np.std(X_train_feats, axis=0, keepdims=True) X_train_feats /= std_feat X_val_feats /= std_feat X_test_feats /= std_feat
num_iters = 3000 for lr in learning_rates: for rs in regularization_strengths: svm = LinearSVM() svm.train(X_train_feats, y_train, lr, rs, num_iters) y_train_pred = svm.predict(X_train_feats) y_val_pred = svm.predict(X_val_feats) ytr_acc = np.mean(y_train == y_train_pred) yval_acc = np.mean(y_val == y_val_pred) results[(lr,rs)] = (ytr_acc, yval_acc) if yval_acc > best_val : best_val = yval_acc best_svm = svm print('best validation accuracy achieved during cross-validation: %f' % best_val)
best validation accuracy achieved during cross-validation: 0.425000
1 2 3 4
# Evaluate your trained SVM on the test set y_test_pred = best_svm.predict(X_test_feats) test_accuracy = np.mean(y_test == y_test_pred) print(test_accuracy)
for hs in hidden_size: for lr in learning_rate: for re in reg: net = TwoLayerNet(input_dim, hs, num_classes) # Train the network stats = net.train(X_train_feats, y_train, X_val_feats, y_val, num_iters=4000, batch_size=400, learning_rate=lr, learning_rate_decay=0.98, reg=re, verbose=False) # Predict on the validation set val_acc = (net.predict(X_val_feats) == y_val).mean() print('Validation accuracy: ', val_acc, ' hs = ', hs, ' lr = ', lr, ' re = ', re) if val_acc > best_val: best_val = val_acc best_net = net
Validation accuracy: 0.63 hs = 400 lr = 0.2 re = 5e-05 Validation accuracy: 0.587 hs = 400 lr = 0.2 re = 1e-05 Validation accuracy: 0.58 hs = 400 lr = 0.2 re = 5e-06 Validation accuracy: 0.583 hs = 400 lr = 0.25 re = 5e-05 Validation accuracy: 0.582 hs = 400 lr = 0.25 re = 1e-05 Validation accuracy: 0.587 hs = 400 lr = 0.25 re = 5e-06 Validation accuracy: 0.593 hs = 400 lr = 0.3 re = 5e-05 Validation accuracy: 0.576 hs = 400 lr = 0.3 re = 1e-05 Validation accuracy: 0.588 hs = 400 lr = 0.3 re = 5e-06