opencv和mediapipe实现手势识别

本篇文章只是手势识别的一个demo,想要识别的精度更高,还需要添加其他的约束条件,这里只是根据每个手指关键点和手掌根部的距离来判断手指是伸展开还是弯曲的。关于mediapi pe的简介,可以去看官网:Home – mediapipe,官网有现成的demo程序,直接拷贝应用就可以实现手掌21个关键点的识别,这21个关键点的分布如下:

opencv和mediapipe实现手势识别

而且,检测的实时性也非常的不错:

opencv和mediapipe实现手势识别

当然,mediapipe不止可以检测手势,面部检测,姿态检测都可以:

opencv和mediapipe实现手势识别

下面说下这个手势识别的demo的大体思路:

首先,要import必要的库和导入必要的函数方法:

import cv2 as cv
import numpy as np
import mediapipe as mp
from numpy import linalg

#手部检测函数
mpHands = mp.solutions.hands
hands = mpHands.Hands()

#绘制关键点和连接线函数
mpDraw = mp.solutions.drawing_utils
handLmsStyle = mpDraw.DrawingSpec(color=(0, 0, 255), thickness=int(5))
handConStyle = mpDraw.DrawingSpec(color=(0, 255, 0), thickness=int(10))

其中,handLmsStyle和handConStyle分别是关键点和连接线的特征,包括颜色和关键点(连接线)的宽度。

如果画面中有手,就可以通过如下函数将关键点和连接线表示出来

if result.multi_hand_landmarks:
    #同时出现两只手都可以检测
    for i, handLms in enumerate(result.multi_hand_landmarks):
        mpDraw.draw_landmarks(frame, handLms, mpHands.HAND_CONNECTIONS,
                              landmark_drawing_spec=handLmsStyle,
                              connection_drawing_spec=handConStyle)

opencv和mediapipe实现手势识别

有个这21个关键点,可以做的事情就太多了,比如控制电脑的音量,鼠标、键盘,如果有个完善的手势姿态库,还可以做比如手语识别等等。因为实际生活中,手的摆放不一定是正好手心面向摄像头的,所以约束条件越苛刻,精度就会越高,这里的做法就没有考虑这么多,就只是用手指不同姿态时的向量L2范数(就是向量的模,忘记了就看线性代数或者机器学习)不同,来粗略的检测,比如说食指,伸直的时候和弯曲的时候,指尖(点8)到手掌根部(点0)的向量模dist1肯定是大于点6到点0的向量模dist2的,如果食指弯曲的时候,则有dist1 < dist2,食指、中指、无名指和小拇指的判断都是如此,仅大拇指就点17代替的点0,代码如下:

for k in range (5):
    if k == 0:
        figure_ = finger_stretch_detect(landmark[17],landmark[4*k+2],
                                        landmark[4*k+4])
    else:
        figure_ = finger_stretch_detect(landmark[0],landmark[4*k+2],
                                        landmark[4*k+4])

然后通过五个手指的状态,来判断当前的手势,我这里列举了一些,简单粗暴:

def detect_hands_gesture(result):
    if (result[0] == 1) and (result[1] == 0) and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "good"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "one"
    elif (result[0] == 0) and (result[1] == 0)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "please civilization in testing"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "two"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 0):
        gesture = "three"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "four"
    elif (result[0] == 1) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "five"
    elif (result[0] == 1) and (result[1] == 0)and (result[2] == 0) and (result[3] == 0) and (result[4] == 1):
        gesture = "six"
    elif (result[0] == 0) and (result[1] == 0)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "OK"
    elif(result[0] == 0) and (result[1] == 0) and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "stone"
    else:
        gesture = "not in detect range..."

    return gesture

然后根据判断的结果输出即可,效果如下:

opencv和mediapipe实现手势识别

完整代码如下:

import cv2 as cv
import numpy as np
import mediapipe as mp
from numpy import linalg

#&#x89C6;&#x9891;&#x8BBE;&#x5907;&#x53F7;
DEVICE_NUM = 0

&#x624B;&#x6307;&#x68C0;&#x6D4B;
point1-&#x624B;&#x638C;0&#x70B9;&#x4F4D;&#x7F6E;&#xFF0C;point2-&#x624B;&#x6307;&#x5C16;&#x70B9;&#x4F4D;&#x7F6E;&#xFF0C;point3&#x624B;&#x6307;&#x6839;&#x90E8;&#x70B9;&#x4F4D;&#x7F6E;
def finger_stretch_detect(point1, point2, point3):
    result = 0
    #&#x8BA1;&#x7B97;&#x5411;&#x91CF;&#x7684;L2&#x8303;&#x6570;
    dist1 = np.linalg.norm((point2 - point1), ord=2)
    dist2 = np.linalg.norm((point3 - point1), ord=2)
    if dist2 > dist1:
        result = 1

    return result

&#x68C0;&#x6D4B;&#x624B;&#x52BF;
def detect_hands_gesture(result):
    if (result[0] == 1) and (result[1] == 0) and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "good"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "one"
    elif (result[0] == 0) and (result[1] == 0)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "please civilization in testing"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "two"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 0):
        gesture = "three"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "four"
    elif (result[0] == 1) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "five"
    elif (result[0] == 1) and (result[1] == 0)and (result[2] == 0) and (result[3] == 0) and (result[4] == 1):
        gesture = "six"
    elif (result[0] == 0) and (result[1] == 0)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "OK"
    elif(result[0] == 0) and (result[1] == 0) and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "stone"
    else:
        gesture = "not in detect range..."

    return gesture

def detect():
    # &#x63A5;&#x5165;USB&#x6444;&#x50CF;&#x5934;&#x65F6;&#xFF0C;&#x6CE8;&#x610F;&#x4FEE;&#x6539;cap&#x8BBE;&#x5907;&#x7684;&#x7F16;&#x53F7;
    cap = cv.VideoCapture(DEVICE_NUM)
    # &#x52A0;&#x8F7D;&#x624B;&#x90E8;&#x68C0;&#x6D4B;&#x51FD;&#x6570;
    mpHands = mp.solutions.hands
    hands = mpHands.Hands()
    # &#x52A0;&#x8F7D;&#x7ED8;&#x5236;&#x51FD;&#x6570;&#xFF0C;&#x5E76;&#x8BBE;&#x7F6E;&#x624B;&#x90E8;&#x5173;&#x952E;&#x70B9;&#x548C;&#x8FDE;&#x63A5;&#x7EBF;&#x7684;&#x5F62;&#x72B6;&#x3001;&#x989C;&#x8272;
    mpDraw = mp.solutions.drawing_utils
    handLmsStyle = mpDraw.DrawingSpec(color=(0, 0, 255), thickness=int(5))
    handConStyle = mpDraw.DrawingSpec(color=(0, 255, 0), thickness=int(10))

    figure = np.zeros(5)
    landmark = np.empty((21, 2))

    if not cap.isOpened():
        print("Can not open camera.")
        exit()

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Can not receive frame (stream end?). Exiting...")
            break

        #mediaPipe&#x7684;&#x56FE;&#x50CF;&#x8981;&#x6C42;&#x662F;RGB&#xFF0C;&#x6240;&#x4EE5;&#x6B64;&#x5904;&#x9700;&#x8981;&#x8F6C;&#x6362;&#x56FE;&#x50CF;&#x7684;&#x683C;&#x5F0F;
        frame_RGB = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        result = hands.process(frame_RGB)
        #&#x8BFB;&#x53D6;&#x89C6;&#x9891;&#x56FE;&#x50CF;&#x7684;&#x9AD8;&#x548C;&#x5BBD;
        frame_height = frame.shape[0]
        frame_width  = frame.shape[1]

        #print(result.multi_hand_landmarks)
        #&#x5982;&#x679C;&#x68C0;&#x6D4B;&#x5230;&#x624B;
        if result.multi_hand_landmarks:
            #&#x4E3A;&#x6BCF;&#x4E2A;&#x624B;&#x7ED8;&#x5236;&#x5173;&#x952E;&#x70B9;&#x548C;&#x8FDE;&#x63A5;&#x7EBF;
            for i, handLms in enumerate(result.multi_hand_landmarks):
                mpDraw.draw_landmarks(frame,
                                      handLms,
                                      mpHands.HAND_CONNECTIONS,
                                      landmark_drawing_spec=handLmsStyle,
                                      connection_drawing_spec=handConStyle)

                for j, lm in enumerate(handLms.landmark):
                    xPos = int(lm.x * frame_width)
                    yPos = int(lm.y * frame_height)
                    landmark_ = [xPos, yPos]
                    landmark[j,:] = landmark_

                # &#x901A;&#x8FC7;&#x5224;&#x65AD;&#x624B;&#x6307;&#x5C16;&#x4E0E;&#x624B;&#x6307;&#x6839;&#x90E8;&#x5230;0&#x4F4D;&#x7F6E;&#x70B9;&#x7684;&#x8DDD;&#x79BB;&#x5224;&#x65AD;&#x624B;&#x6307;&#x662F;&#x5426;&#x4F38;&#x5F00;(&#x62C7;&#x6307;&#x68C0;&#x6D4B;&#x5230;17&#x70B9;&#x7684;&#x8DDD;&#x79BB;)
                for k in range (5):
                    if k == 0:
                        figure_ = finger_stretch_detect(landmark[17],landmark[4*k+2],landmark[4*k+4])
                    else:
                        figure_ = finger_stretch_detect(landmark[0],landmark[4*k+2],landmark[4*k+4])

                    figure[k] = figure_
                print(figure,'\n')

                gesture_result = detect_hands_gesture(figure)
                cv.putText(frame, f"{gesture_result}", (30, 60*(i+1)), cv.FONT_HERSHEY_COMPLEX, 2, (255 ,255, 0), 5)

        cv.imshow('frame', frame)
        if cv.waitKey(1) == ord('q'):
            break

    cap.release()
    cv.destroyAllWindows()

if __name__ == '__main__':
    detect()

我的公众号:

opencv和mediapipe实现手势识别
​​​​​​​

Original: https://blog.csdn.net/weixin_41747193/article/details/122117629
Author: 王三思
Title: opencv和mediapipe实现手势识别

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/638113/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球