7

基于opencv的视频人脸识别(中文显示)以及 index 480 is out of bounds for axis 0 w...

 3 years ago
source link: https://blog.csdn.net/qq_43381783/article/details/111354147
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

@人脸识别代码和一些常见错误

基于opencv的视频人脸识别(中文显示)以及 index 480 is out of bounds for axis 0 with size 480错误的解决

参考了
https://github.com/niehen6174/face-recognition-and-put-in-chinese
上述代码中采用的是海康的摄像头,这里我用的是自己笔记本的摄像头。
直接使用会显示数组溢出,这里会有一些修改的部分
报错 index 480 is out of bounds for axis 0 with size 480的解决见代码后面的修改部分
直接运行下述代码应该不会报错

ft2(处理中文识别)

# -*- coding: utf-8 -*-
# http://blog.csdn.net/zizi7/article/details/70145150

'''
##################################################
# tools                                          #
#------------------------------------------------#
# draw chinese text using freetype on python2.x  #                  #
# 2017.4.12                                      #
##################################################
'''
                                                              
import numpy as np
import freetype
import copy
import pdb

class put_chinese_text(object):
    def __init__(self, ttf):
        self._face = freetype.Face(ttf)

    def draw_text(self, image, x, y,text, text_size, text_color):
        '''
        draw chinese(or not) text with ttf
        :param image:     image(numpy.ndarray) to draw text
        :param pos:       where to draw text
        :param text:      the context, for chinese should be unicode type
        :param text_size: text size
        :param text_color:text color
        :return:          image
        '''
        self._face.set_char_size(text_size * 64)
        metrics = self._face.size
        ascender = metrics.ascender/64.0

        #descender = metrics.descender/64.0
        #height = metrics.height/64.0
        #linegap = height - ascender + descender
        ypos = int(ascender)

        if not isinstance(text, str):
            #对于Python 2中的unicode和Python 3中的str,对于Python 2中的str/bytes和Python 3中的bytes的二进制文件
            text = text.decode('utf-8')
        img = self.draw_string(image,x,y+ypos, text, text_color)
        return img

    def draw_string(self, img, x_pos, y_pos, text, color):
        '''
        draw string
        :param x_pos: text x-postion on img
        :param y_pos: text y-postion on img
        :param text:  text (unicode)
        :param color: text color
        :return:      image
        '''
        prev_char = 0
        pen = freetype.Vector()
        pen.x = x_pos << 6   # div 64
        pen.y = y_pos << 6

        hscale = 1.0
        matrix = freetype.Matrix(int((hscale)*0x10000), int(0.2*0x10000),\
                                 int(0.0*0x10000), int(1.1*0x10000))
        cur_pen = freetype.Vector()
        pen_translate = freetype.Vector()

        image = copy.deepcopy(img)
        for cur_char in text:
            self._face.set_transform(matrix, pen_translate)

            self._face.load_char(cur_char)
            kerning = self._face.get_kerning(prev_char, cur_char)
            pen.x += kerning.x
            slot = self._face.glyph
            bitmap = slot.bitmap

            cur_pen.x = pen.x
            cur_pen.y = pen.y - slot.bitmap_top * 64
            self.draw_ft_bitmap(image, bitmap, cur_pen, color)

            pen.x += slot.advance.x
            prev_char = cur_char

        return image

    def draw_ft_bitmap(self, img, bitmap, pen, color):
        '''
        draw each char
        :param bitmap: bitmap
        :param pen:    pen
        :param color:  pen color e.g.(0,0,255) - red
        :return:       image
        '''
        x_pos = pen.x >> 6
        y_pos = pen.y >> 6
        cols = bitmap.width
        rows = bitmap.rows

        glyph_pixels = bitmap.buffer

        for row in range(rows):
            for col in range(cols):
                if glyph_pixels[row*cols + col] != 0:
                    img[y_pos + row][x_pos + col][0] = color[0]
                    img[y_pos + row][x_pos + col][1] = color[1]
                    img[y_pos + row][x_pos + col][2] = color[2]


if __name__ == '__main__':
    # just for test
    import cv2

    line = '你好'
    img = np.zeros([300,300,3])

    color_ = (0,255,0) # Green
    pos = (3, 3)
    text_size = 24

    #ft = put_chinese_text('wqy-zenhei.ttc')
    ft = put_chinese_text('msyh.ttf')
    image = ft.draw_text(img, pos, line, text_size, color_)

    cv2.imshow('diplay', image)
    cv2.waitKey(0)

修改的部分是第44行的,就是这里的数组出现了问题。

img = self.draw_string(image,x,y+ypos, text, text_color)

face_mobilevideo实现人脸识别,按下S键可拍照并保存。

# -*- coding: utf-8 -*-
# 摄像头头像识别
import face_recognition
import cv2
import ft2
from PIL import Image, ImageDraw, ImageFont
import numpy as np

cam = cv2.VideoCapture(0)
# 本地图像
kxs_image = face_recognition.load_image_file("kxs.jpg")
kxs_face_encoding = face_recognition.face_encodings(kxs_image)[0]

# 本地图像二
wsp_image = face_recognition.load_image_file("wsp.jpg")
wsp_face_encoding = face_recognition.face_encodings(wsp_image)[0]

# 本地图片三2
pr_image = face_recognition.load_image_file("pr.jpg")
pr_face_encoding = face_recognition.face_encodings(pr_image)[0]

# Create arrays of known face encodings and their names
# 脸部特征数据的集合
known_face_encodings = [
    kxs_face_encoding,
    wsp_face_encoding,
    pr_face_encoding
]

# 人物名称的集合
"""""
known_face_names = [
    "kxs",
    "wsp",
    "panrui"
]
"""""
known_face_names = [
    "匡",
    "王",
    "潘"
]
face_locations = []
face_encodings = []
face_names = []
process_this_frame = True


while(cam.isOpened()):
    # 读取摄像头画面
    ret, frame = cam.read()
    if not ret:
        #等同于 if ret is not none
        break
    sucess, img = cam.read()
    k = cv2.waitKey(1)
    if k == 27:
        # 通过esc键退出摄像
        cv2.destroyAllWindows()
        break
    elif k == ord("s"):
        # 通过s键保存图片,并退出。
        num = 1
        cv2.imwrite("image%s.jpg" % num, img)
        num+=1;
        print("ok")
        #cv2.destroyAllWindows()
        break
    # 改变摄像头图像的大小,图像小,所做的计算就少
    small_frame = cv2.resize(frame, (0, 0), fx=0.33, fy=0.33)

    # opencv的图像是BGR格式的,而我们需要是的RGB格式的,因此需要进行一个转换。
    rgb_small_frame = small_frame[:, :, ::-1]

    # Only process every other frame of video to save time
    if process_this_frame:
        # 根据encoding来判断是不是同一个人,是就输出true,不是为flase
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)

        face_names = []
        for face_encoding in face_encodings:
            # 默认为unknown
            matches = face_recognition.compare_faces(known_face_encodings, face_encoding,tolerance=0.48)
            #阈值太低容易造成无法成功识别人脸,太高容易造成人脸识别混淆 默认阈值tolerance为0.6
            #print(matches)
            name = "Unknown"

            # if match[0]:
            #     name = "michong"
            # If a match was found in known_face_encodings, just use the first one.
            if True in matches:
                first_match_index = matches.index(True)
                name = known_face_names[first_match_index]

            face_names.append(name)

    process_this_frame = not process_this_frame

    # 将捕捉到的人脸显示出来
    for (top, right, bottom, left), name in zip(face_locations, face_names):
        # Scale back up face locations since the frame we detected in was scaled to 1/4 size
        #由于我们检测到的帧被缩放到1/4大小,所以要缩小面位置
        top *= 3
        right *= 3
        bottom *= 3
        left *= 3

        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 3)
        print(name)
        ft = ft2.put_chinese_text('msyh.ttf')
        #引入ft2中的字体
        #加上标签
        xpos=left+10
        ypos=bottom
        frame = ft.draw_text(frame,xpos,ypos,name, 20, (255, 255, 255))
       


    cv2.imshow('monitor', frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cam.release()
cv2.destroyAllWindows()

修改的部分是120行左右的

		xpos=left+10
        ypos=bottom
        frame = ft.draw_text(frame,xpos,ypos,name, 20, (255, 255, 255))

只输出英文(把上面的输出文字改为下面的代码即可)

cv2.putText(frame, name, (left + 6, bottom - 6), 000, 0.8, (255, 255, 255), 1)  # 这是不输入汉字时可以用的代码
在这里插入图片描述
** index 480 is out of bounds for axis 0 with size 480**
就按我上面的修改就不会出错了。

代码文件下载

https://github.com/horsein/-python_face_recognition–/tree/main


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK