cv.solvePnP估计人脸姿态时,对输入数据不响应?

from __future__ import print_function

import os

import cv2

import sys

import numpy as np

import math

import json

# 3D model points.

MODEL_POINTS_68 = np.array([

[-53.30021857, -15.27635071, 60.43970857],

[-52.85842643, -1.810875, 59.18390214],

[-51.25744357, 11.67454214, 58.42188571],

[-48.626315, 24.63487214, 57.20655286],

[-43.58355, 36.41352214, 53.98811286],

[-35.42554, 46.354015, 48.35620143],

[-25.24848857, 54.04395214, 39.57465214],

[-13.64442429, 59.62245214, 28.96935357],

[ -0.80573857, 61.05416214, 25.76168714],

[ 11.60732, 59.40927786, 30.09232929],

[ 22.44441643, 53.81683, 40.08901643],

[ 32.24683929, 46.23280714, 48.37045 ],

[ 40.083355, 36.30976214, 53.26906357],

[ 45.11549929, 24.19315786, 55.59597786],

[ 47.84661071, 11.00381857, 56.59250643],

[ 49.47115857, -2.32260857, 57.92187643],

[ 50.10645643, -15.67903571, 60.06320429],

[-44.53299214, -29.24790071, 36.57320143],

[-37.510265, -35.96803857, 31.583005 ],

[-27.87970214, -38.27258143, 26.70722214],

[-18.03538357, -37.58489857, 21.67254214],

[ -9.18749714, -34.48015571, 17.84333 ],

[ 7.73560929, -34.98320857, 17.78346857],

[ 17.05676643, -38.20546071, 21.16640143],

[ 26.50843214, -38.83083714, 25.66079071],

[ 35.68873143, -36.34843286, 30.21249643],

[ 42.01933429, -29.83979571, 34.68843286],

[ -0.40917357, -24.128035, 16.83373214],

[ -0.30141, -16.12798571, 11.28371929],

[ -0.16755643, -8.23575357, 5.47430286],

[ 0., 0., -0. ],

[-10.55176571, 1.85589643, -14.38000214],

[ -5.12874214, 3.39399214, -16.81191714],

[ 0.39708571, 4.68778571, -18.53174857],

[ 5.90892786, 3.32928929, -16.92552929],

[ 10.86739357, 1.88789, -14.89868357],

[-32.89092143, -26.76529357, 5.027135 ],

[-26.91049143, -30.52179286, 2.15801214],

[-19.91704, -30.50822643, 0.96687786],

[-14.03447714, -26.25338714, -0.07934857],

[-20.194975, -25.09606643, -0.105195 ],

[-27.20172714, -24.94217357, 1.05472286],

[ 13.76133429, -26.45164714, -0.47553286],

[ 19.92442214, -30.95888929, 0.1769 ],

[ 26.74109214, -30.79344429, 1.21173929],

[ 32.26486071, -27.20465357, 3.49583071],

[ 27.28318143, -25.38001714, 0.202115 ],

[ 20.54642071, -25.34592071, -0.837625 ],

[-20.65447643, 20.43765429, -1.60022143],

[-12.52371, 15.83727643, -11.38166786],

[ -4.77470714, 13.59217929, -16.15096786],

[ 0.27214357, 14.80079857, -16.96316929],

[ 5.98245929, 13.59675714, -16.22999643],

[ 13.48329857, 15.99579214, -11.150485 ],

[ 20.56743714, 20.05708857, -2.29813786],

[ 13.61255286, 25.92732, -10.70571214],

[ 6.39741071, 28.31041071, -16.110175 ],

[ 0.272535, 28.85403357, -16.85116143],

[ -5.30635357, 28.454575, -16.00436143],

[-12.97188143, 26.19849929, -10.80136214],

[-17.41249286, 20.48412214, -3.41834571],

[ -4.92688071, 18.19712571, -14.92410143],

[ 0.24333071, 18.58162071, -15.87177071],

[ 6.03194429, 18.09014143, -15.01822857],

[ 17.48176643, 20.23072, -4.08055429],

[ 6.03511857, 21.85444, -14.76534929],

[ 0.14665857, 22.43481286, -15.64547857],

[ -5.14161857, 22.03205429, -14.52001571]], dtype=float)

class PoseEstimator:

"""Estimate head pose according to the facial landmarks"""

def __init__(self, img_size=(192, 256)):

self.size = img_size

self.landmarks_general_68 = MODEL_POINTS_68

self.focal_length = 962

# self.focal_length = self.size[1]

self.camera_center = (self.size[1] / 2, self.size[0] / 2)

self.camera_matrix = np.array(

[[self.focal_length, 0, self.camera_center[0]],

[0, self.focal_length, self.camera_center[1]],

[0, 0, 1]], dtype=float)

# Assuming no lens distortion

self.dist_coeefs = np.zeros((4, 1))

# Rotation vector and translation vector; 原代码给它设置了初值,不知道怎么来的?

"""self.r_vec = np.array([[0], [0], [0]], dtype=float)

self.t_vec = np.array([[0], [0], [0]], dtype=float)"""

self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])

self.t_vec = np.array([[-14.97821226], [-10.62040383], [-2053.03596872]])

def get_euler_angle(self, rotation_vector):

# calc rotation angles

theta = cv2.norm(rotation_vector, cv2.NORM_L2)

# transform to quaterniond

w = math.cos(theta / 2)

x = math.sin(theta / 2)*rotation_vector[0][0] / theta

y = math.sin(theta / 2)*rotation_vector[1][0] / theta

z = math.sin(theta / 2)*rotation_vector[2][0] / theta

# pitch (x-axis rotation)

t0 = 2.0 * (w*x + y*z)

t1 = 1.0 - 2.0*(x**2 + y**2)

pitch = math.atan2(t0, t1)

# yaw (y-axis rotation)

t2 = 2.0 * (w*y - z*x)

if t2 > 1.0:

t2 = 1.0

if t2 < -1.0:

t2 = -1.0

yaw = math.asin(t2)

# roll (z-axis rotation)

t3 = 2.0 * (w*z + x*y)

t4 = 1.0 - 2.0*(y**2 + z**2)

roll = math.atan2(t3, t4)

return pitch, yaw, roll

def solve_pose_by_68_points(self, image_points):

print(self.landmarks_general_68.shape)

print(image_points.shape)

success, rotation_vector, translation_vector = cv2.solvePnP(

self.landmarks_general_68,

image_points,

self.camera_matrix,

self.dist_coeefs,

rvec=self.r_vec,

tvec=self.t_vec,

flags=cv2.SOLVEPNP_ITERATIVE,

useExtrinsicGuess=True)

#flags=SOLVEPNP_EPNP)

print(success)

print(self.r_vec)

return rotation_vector, translation_vector

if __name__ == "__main__":

img = cv2.imread(IMAGE_PATH)

img = cv2.resize(img, (192, 256))

landmarks_load = json.load(open(LANDMARKS_ROOT, 'r'))

landmarks_98 = get_landmarks(IMAGE_PATH, landmarks_load)

landmarks_98 = np.array(landmarks_98, dtype=float)*4

landmarks_68 = get_68from98(landmarks_98)

print(landmarks_68)

pose_estimator = PoseEstimator(img_size=img.shape)

rotation_vector, translation_vector = pose_estimator.solve_pose_by_68_points(landmarks_68)

pitch, yaw, roll = pose_estimator.get_euler_angle(rotation_vector)

def _radian2angle(r):

return (r/math.pi)*180

Y, X, Z = map(_radian2angle, [pitch, yaw, roll])

line = 'Y:{:.1f}\nX:{:.1f}\nZ:{:.1f}'.format(Y,X,Z)

print('{},{}'.format(os.path.basename(IMAGE_PATH), line.replace('\n',',')))

print('yaw:{}, pitch:{}, roll:{}'.format(yaw, pitch, roll))

print('Translation vector:{}'.format(translation_vector))

参照https://blog.csdn.net/ChuiGeD...,进行人脸姿态估计,使用cv.solvePnP函数将68点标准3D人脸模型拟合图片上预标注的68点2D关键点。
我的代码修改了参考代码中r_vec与t_vec的初始值;将68个3D点进行了坐标轴方向变换(x轴左负右正、y轴上负下正、z轴近负远正),原点变换(鼻尖,点[30]),尺度缩放(近似图片中人脸的平均尺寸)。根据KDEF数据集的相机参数推算了焦距,其余相机内参维持了原始代码的值。
经测试发现,计算得到的角度完全不符合实际,且采用不同姿态的图片数据输入,获取的输出差别很细微。
而当我大幅度修改焦距、或者将r_vec、t_vec还原为参考代码的初值时,结果会发生显著变化且仍错误,且仍不对图片输入的变化起反应。
r_vec与t_vec根据函数描述似乎是用来存储输出结果的容器,不理解为什么它们初值的变化会影响输出结果?感觉是函数没有完成预期的计算效果?
人脸姿态估计这一步我也不需要做得很精确,但是对我项目的后续工作不可或缺,在网上查了一圈基本是用的这个cv函数模块。请问最好有使用过这个函数或者完成过类似姿态估计任务的大佬,分析一下问题或者分享一下经验?谢谢!!~~


后续我会贴出测试用的正面脸、侧面脸、3D点截取前两维的3D点数据,作为测试输入。这样的话,2D点的输入处理函数就可以忽略。
正面人脸:

LANDMARKS_68_S = np.array([

[43.0, 126.66666412353516],

[42.0, 133.33333587646484],

[46.0, 148.0],

[47.0, 162.66666412353516],

[50.0, 177.33333587646484],

[57.0, 189.3333282470703],

[68.0, 198.6666717529297],

[83.0, 211.1111094156901],

[97.0, 217.3333282470703],

[109.0, 210.66666412353516],

[120.0, 201.3333282470703],

[128.0, 192.0],

[138.0, 181.33333587646484],

[142.0, 168.0],

[146.0, 156.0],

[83.0, 211.1111094156901],

[143.0, 121.33333587646484],

[45.0, 121.33333587646484],

[54.0, 114.66666793823242],

[66.0, 116.0],

[73.0, 117.33333587646484],

[82.0, 118.66666412353516],

[106.0, 118.66666412353516],

[114.0, 118.66666793823242],

[121.0, 118.66666793823242],

[129.0, 112.0],

[137.0, 118.66666412353516],

[93.0, 129.3333282470703],

[93.0, 134.6666717529297],

[95.0, 150.6666717529297],

[93.0, 158.6666717529297],

[81.0, 161.3333282470703],

[89.0, 164.0],

[95.0, 169.3333282470703],

[103.0, 164.0],

[109.0, 158.6666717529297],

[57.0, 132.0],

[61.0, 124.0],

[75.0, 126.66666412353516],

[81.0, 129.3333282470703],

[75.0, 137.3333282470703],

[65.0, 137.3333282470703],

[107.0, 132.0],

[111.0, 126.66666412353516],

[121.0, 124.0],

[131.0, 124.0],

[127.0, 129.3333282470703],

[113.0, 132.0],

[69.0, 172.0],

[83.0, 169.3333282470703],

[91.0, 169.3333282470703],

[93.0, 169.3333282470703],

[101.0, 166.6666717529297],

[109.0, 169.3333282470703],

[121.0, 169.3333282470703],

[115.0, 182.6666717529297],

[105.0, 188.0],

[97.0, 188.0],

[85.0, 190.6666717529297],

[79.0, 182.6666717529297],

[73.0, 172.0],

[85.0, 172.0],

[93.0, 172.0],

[105.0, 174.6666717529297],

[117.0, 172.0],

[109.0, 182.6666717529297],

[97.0, 185.3333282470703],

[83.0, 185.3333282470703]], dtype=float)

右转45度人脸:

LANDMARKS_68_HR = np.array([

[65.0, 121.33333587646484],

[66.0, 133.33333587646484],

[67.0, 148.0],

[68.0, 172.0],

[73.0, 181.3333282470703],

[84.0, 196.0],

[99.0, 202.6666717529297],

[117.66666666666667, 211.1111094156901],

[135.0, 209.3333282470703],

[139.0, 201.33333587646484],

[143.0, 192.0],

[147.0, 178.6666717529297],

[150.0, 169.33333587646484],

[155.0, 158.66666412353516],

[159.0, 153.33333587646484],

[117.66666666666667, 211.1111094156901],

[157.0, 126.66666412353516],

[95.0, 121.33333587646484],

[103.0, 120.0],

[115.0, 116.0],

[122.0, 116.0],

[131.0, 121.33333206176758],

[145.0, 121.33333206176758],

[149.0, 122.66666793823242],

[154.0, 118.66666793823242],

[156.0, 117.33333587646484],

[159.0, 113.33333587646484],

[143.0, 132.0],

[143.0, 140.0],

[147.0, 153.3333282470703],

[149.0, 156.0],

[129.0, 161.3333282470703],

[133.0, 164.0],

[143.0, 164.0],

[145.0, 161.3333282470703],

[151.0, 161.3333282470703],

[101.0, 132.0],

[109.0, 124.0],

[123.0, 124.0],

[127.0, 132.0],

[121.0, 137.3333282470703],

[111.0, 134.6666717529297],

[145.0, 132.0],

[145.0, 126.66666412353516],

[153.0, 124.0],

[153.0, 124.0],

[153.0, 129.3333282470703],

[145.0, 129.3333282470703],

[111.0, 172.0],

[121.0, 169.3333282470703],

[139.0, 174.6666717529297],

[143.0, 172.0],

[143.0, 174.6666717529297],

[145.0, 172.0],

[145.0, 172.0],

[141.0, 182.6666717529297],

[143.0, 188.0],

[139.0, 190.6666717529297],

[127.0, 185.3333282470703],

[119.0, 185.3333282470703],

[111.0, 172.0],

[127.0, 172.0],

[137.0, 172.0],

[143.0, 172.0],

[141.0, 172.0],

[143.0, 180.0],

[139.0, 185.3333282470703],

[121.0, 180.0]], dtype=float)

标准人脸:

LANDMARKS_68 = np.array([

[42.69978143, 112.72364929],

[43.14157357, 126.189125],

[44.74255643, 139.67454214],

[47.373685, 152.63487214],

[52.41645, 164.41352214],

[60.57446, 174.354015],

[70.75151143, 182.04395214],

[82.35557571, 187.62245214],

[95.19426143, 189.05416214000002],

[107.60732, 187.40927786],

[118.44441643, 181.81683],

[128.24683929, 174.23280714],

[136.08335499999998, 164.30976214],

[141.11549929, 152.19315785999999],

[143.84661071, 139.00381857],

[145.47115857, 125.67739143],

[146.10645643, 112.32096429],

[51.46700786, 98.75209929],

[58.489735, 92.03196143],

[68.12029786, 89.72741857],

[77.96461643, 90.41510142999999],

[86.81250286, 93.51984429000001],

[103.73560929, 93.01679143],

[113.05676643, 89.79453929],

[122.50843214, 89.16916286],

[131.68873143, 91.65156714],

[138.01933429000002, 98.16020429],

[95.59082643, 103.871965],

[95.69859, 111.87201429],

[95.83244357, 119.76424643],

[96.0, 128.0],

[85.44823429, 129.85589643],

[90.87125786, 131.39399214],

[96.39708571, 132.68778571],

[101.90892786, 131.32928929],

[106.86739357, 129.88789],

[63.10907857, 101.23470643],

[69.08950856999999, 97.47820714],

[76.08296, 97.49177356999999],

[81.96552286, 101.74661286],

[75.805025, 102.90393356999999],

[68.79827286, 103.05782643],

[109.76133429000001, 101.54835286],

[115.92442214, 97.04111071],

[122.74109214, 97.20655571],

[128.26486071, 100.79534643],

[123.28318143, 102.61998286],

[116.54642071, 102.65407929],

[75.34552357, 148.43765429],

[83.47629, 143.83727643],

[91.22529286, 141.59217929],

[96.27214357, 142.80079857],

[101.98245929, 141.59675714],

[109.48329857, 143.99579214],

[116.56743714, 148.05708857],

[109.61255286, 153.92732],

[102.39741071, 156.31041070999999],

[96.272535, 156.85403357],

[90.69364643, 156.454575],

[83.02811857, 154.19849929],

[78.58750714, 148.48412214],

[91.07311929, 146.19712571],

[96.24333071, 146.58162071],

[102.03194429, 146.09014143],

[113.48176643, 148.23072],

[102.03511857, 149.85444],

[96.14665857, 150.43481286],

[90.85838143, 150.03205429]], dtype=float)


目前的估计效果,绿点为预标注的2D关键点;人脸图片仅为展示,不参与估计运算。(正面图上传失败,试了几次了;稍后再试试。。)
正面脸:Y:10.9 X:-2.7 Z:-179.2
cv.solvePnP估计人脸姿态时,对输入数据不响应?
cv.solvePnP估计人脸姿态时,对输入数据不响应?

以上是 cv.solvePnP估计人脸姿态时,对输入数据不响应? 的全部内容, 来源链接: utcz.com/a/161220.html

回到顶部