准确率99%,分类错误 - 三元组网络

我试图按照facenet article中所述的方式训练三元组网络。准确率99%,分类错误 - 三元组网络

我通过计算正距离(锚 - 正)小于负距离(锚 - 负),然后除以三元组中总数的三元组来验证验证集的准确性批量。

我获得了很好的结果:99%的准确性。但是当我使用我的模型嵌入来对图像进行分类时(我使用未知的图像并将其与使用欧几里德距离的图像进行比较),但只有20%的结果是正确的。

我在做什么错?

下面你可以找到我的详细实现。


三联代

三重代之前,我已经对准和裁剪都在训练和使用DLIB(包括中科院自动化所和LFW)测试集使每个面的主要元素(眼睛,noes,嘴唇)的位置几乎相同。

为了生成三元组,我随机选择一个包含40个或更多图像的CASIA文件夹,然后选择40个锚点,每个锚点具有相应的正面图像(随机选取,但与锚点不同)。然后,我为每个锚点 - 正值对选择一个随机负值。


三重损失

这是我的三重损失函数:

def triplet_loss(d_pos, d_neg): 

print("d_pos "+str(d_pos))

print("d_neg "+str(d_neg))

margin = 0.2

loss = tf.reduce_mean(tf.maximum(0., margin + d_pos - d_neg))

return loss

这些都是我正距离(间锚和阳性)和负距离(锚之间负)。

**model1** = embeddings generated for the anchor image 

**model2** = embeddings generated for the positive image

**model3** = embeddings generated for the negative image

可变成本是我在每个步骤计算的损失。

d_pos_triplet = tf.reduce_sum(tf.square(model1 - model2), 1) 

d_neg_triplet = tf.reduce_sum(tf.square(model1 - model3), 1)

d_pos_triplet_acc = tf.sqrt(d_pos_triplet + 1e-10)

d_neg_triplet_acc = tf.sqrt(d_neg_triplet + 1e-10)

d_pos_triplet_test = tf.reduce_sum(tf.square(model1_test - model2_test), 1)

d_neg_triplet_test = tf.reduce_sum(tf.square(model1_test - model3_test), 1)

d_pos_triplet_acc_test = tf.sqrt(d_pos_triplet_test + 1e-10)

d_neg_triplet_acc_test = tf.sqrt(d_neg_triplet_test + 1e-10)

cost = triplet_loss(d_pos_triplet, d_neg_triplet)

cost_test = triplet_loss(d_pos_triplet_test, d_neg_triplet_test)

然后我拿的嵌入逐一和测试,如果损失是肯定的 - 因为0丢失意味着网络不学习(如facenet文章中说我有选择半硬三胞胎)

input1,input2, input3, anchor_folder_helper, anchor_photo_helper, positive_photo_helper = training.next_batch_casia(s,e) #generate complet random 

s = i * batch_size

e = (i+1) *batch_size

input1,input2, input3, anchor_folder_helper, anchor_photo_helper, positive_photo_helper = training.next_batch_casia(s,e) #generate complet random

lly = 0;

'''counter which helps me generate the same number of triplets each batch'''

while lly < len(input1):

input_lly1 = input1[lly:lly+1]

input_lly2 = input2[lly:lly+1]

input_lly3 = input3[lly:lly+1]

loss_value = sess.run([cost], feed_dict={x_anchor:input_lly1, x_positive:input_lly2, x_negative:input_lly3})

while(loss_value[0]<=0):

''' While the generated triplet has loss 0 (which means dpos - dneg + margin < 0) I keep generating triplets. I stop when I manage to generate a semi-hard triplet. '''

input_lly1,input_lly2, input_lly3, anchor_folder_helper, anchor_photo_helper, positive_photo_helper = training.cauta_hard_negative(anchor_folder_helper, anchor_photo_helper, positive_photo_helper)

loss_value = sess.run([cost], feed_dict={x_anchor:input_lly1, x_positive:input_lly2, x_negative:input_lly3})

if (loss_value[0] > 0):

_, loss_value, distance1_acc, distance2_acc, m1_acc, m2_acc, m3_acc = sess.run([accum_ops, cost, d_pos_triplet_acc, d_neg_triplet_acc, model1, model2, model3], feed_dict={x_anchor:input_lly1, x_positive:input_lly2, x_negative:input_lly3})

tr_acc = compute_accuracy(distance1_acc, distance2_acc)

if math.isnan(tr_acc) and epoch != 0:

print('tr_acc %0.2f' % tr_acc)

pdb.set_trace()

avg_loss += loss_value

avg_acc +=tr_acc*100

contor_i = contor_i + 1

lly = lly + 1

这是我的模型 - 注意,当我申请L2归我准确性显著下降(也许我做错了):

def siamese_convnet(x): 

w_conv1_1 = tf.get_variable(name='w_conv1_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 1, 64])

w_conv1_2 = tf.get_variable(name='w_conv1_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 64, 64])

w_conv2_1 = tf.get_variable(name='w_conv2_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 64, 128])

w_conv2_2 = tf.get_variable(name='w_conv2_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 128, 128])

w_conv3_1 = tf.get_variable(name='w_conv3_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 128, 256])

w_conv3_2 = tf.get_variable(name='w_conv3_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 256, 256])

w_conv3_3 = tf.get_variable(name='w_conv3_3', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 256, 256])

w_conv4_1 = tf.get_variable(name='w_conv4_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 256, 512])

w_conv4_2 = tf.get_variable(name='w_conv4_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 512, 512])

w_conv4_3 = tf.get_variable(name='w_conv4_3', initializer=tf.contrib.layers.xavier_initializer(), shape=[1, 1, 512, 512])

w_conv5_1 = tf.get_variable(name='w_conv5_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 512, 512])

w_conv5_2 = tf.get_variable(name='w_conv5_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 512, 512])

w_conv5_3 = tf.get_variable(name='w_conv5_3', initializer=tf.contrib.layers.xavier_initializer(), shape=[1, 1, 512, 512])

w_fc_1 = tf.get_variable(name='w_fc_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[5*5*512, 2048])

w_fc_2 = tf.get_variable(name='w_fc_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[2048, 1024])

w_out = tf.get_variable(name='w_out', initializer=tf.contrib.layers.xavier_initializer(), shape=[1024, 128])

bias_conv1_1 = tf.get_variable(name='bias_conv1_1', initializer=tf.constant(0.01, shape=[64]))

bias_conv1_2 = tf.get_variable(name='bias_conv1_2', initializer=tf.constant(0.01, shape=[64]))

bias_conv2_1 = tf.get_variable(name='bias_conv2_1', initializer=tf.constant(0.01, shape=[128]))

bias_conv2_2 = tf.get_variable(name='bias_conv2_2', initializer=tf.constant(0.01, shape=[128]))

bias_conv3_1 = tf.get_variable(name='bias_conv3_1', initializer=tf.constant(0.01, shape=[256]))

bias_conv3_2 = tf.get_variable(name='bias_conv3_2', initializer=tf.constant(0.01, shape=[256]))

bias_conv3_3 = tf.get_variable(name='bias_conv3_3', initializer=tf.constant(0.01, shape=[256]))

bias_conv4_1 = tf.get_variable(name='bias_conv4_1', initializer=tf.constant(0.01, shape=[512]))

bias_conv4_2 = tf.get_variable(name='bias_conv4_2', initializer=tf.constant(0.01, shape=[512]))

bias_conv4_3 = tf.get_variable(name='bias_conv4_3', initializer=tf.constant(0.01, shape=[512]))

bias_conv5_1 = tf.get_variable(name='bias_conv5_1', initializer=tf.constant(0.01, shape=[512]))

bias_conv5_2 = tf.get_variable(name='bias_conv5_2', initializer=tf.constant(0.01, shape=[512]))

bias_conv5_3 = tf.get_variable(name='bias_conv5_3', initializer=tf.constant(0.01, shape=[512]))

bias_fc_1 = tf.get_variable(name='bias_fc_1', initializer=tf.constant(0.01, shape=[2048]))

bias_fc_2 = tf.get_variable(name='bias_fc_2', initializer=tf.constant(0.01, shape=[1024]))

out = tf.get_variable(name='out', initializer=tf.constant(0.01, shape=[128]))

x = tf.reshape(x , [-1, 160, 160, 1]);

conv1_1 = tf.nn.relu(conv2d(x, w_conv1_1) + bias_conv1_1);

conv1_2= tf.nn.relu(conv2d(conv1_1, w_conv1_2) + bias_conv1_2);

max_pool1 = max_pool(conv1_2);

conv2_1 = tf.nn.relu(conv2d(max_pool1, w_conv2_1) + bias_conv2_1);

conv2_2 = tf.nn.relu(conv2d(conv2_1, w_conv2_2) + bias_conv2_2);

max_pool2 = max_pool(conv2_2)

conv3_1 = tf.nn.relu(conv2d(max_pool2, w_conv3_1) + bias_conv3_1);

conv3_2 = tf.nn.relu(conv2d(conv3_1, w_conv3_2) + bias_conv3_2);

conv3_3 = tf.nn.relu(conv2d(conv3_2, w_conv3_3) + bias_conv3_3);

max_pool3 = max_pool(conv3_3)

conv4_1 = tf.nn.relu(conv2d(max_pool3, w_conv4_1) + bias_conv4_1);

conv4_2 = tf.nn.relu(conv2d(conv4_1, w_conv4_2) + bias_conv4_2);

conv4_3 = tf.nn.relu(conv2d(conv4_2, w_conv4_3) + bias_conv4_3);

max_pool4 = max_pool(conv4_3)

conv5_1 = tf.nn.relu(conv2d(max_pool4, w_conv5_1) + bias_conv5_1);

conv5_2 = tf.nn.relu(conv2d(conv5_1, w_conv5_2) + bias_conv5_2);

conv5_3 = tf.nn.relu(conv2d(conv5_2, w_conv5_3) + bias_conv5_3);

max_pool5 = max_pool(conv5_3)

fc_helper = tf.reshape(max_pool5, [-1, 5*5*512]);

fc_1 = tf.nn.relu(tf.matmul(fc_helper, w_fc_1) + bias_fc_1);

fc_2 = tf.nn.relu(tf.matmul(fc_1, w_fc_2) + bias_fc_2);

output = tf.matmul(fc_2, w_out) + out

#output = tf.nn.l2_normalize(output, 0) THIS IS COMMENTED

return output

我在一个框架独立的方式模型:

conv 3x3 (1, 64) 

conv 3x3 (64,64)

max_pooling

conv 3x3 (64, 128)

conv 3x3 (128, 128)

max_pooling

conv 3x3 (128, 256)

conv 3x3 (256, 256)

conv 3x3 (256, 256)

max_pooling

conv 3x3 (256, 512)

conv 3x3 (512, 512)

conv 1x1 (512, 512)

max_pooling

conv 3x3 (256, 512)

conv 3x3 (512, 512)

conv 1x1 (512, 512)

max_pooling

fully_connected(128)

fully_connected(128)

output(128)

回答:

你L2正常化功能明智的,当它应该是典范,明智的。

以上是 准确率99%,分类错误 - 三元组网络 的全部内容, 来源链接: utcz.com/qa/263518.html

回到顶部