摄像头人脸识别技术

40

我该如何实现实时人脸检测,就像“相机”一样?

enter image description here

我发现在10.0之后,AVCaptureStillImageOutput已被弃用,因此我改用AVCapturePhotoOutput。但是,我发现保存用于面部检测的图像效果并不理想。有什么想法吗?

更新

在尝试了@Shravya Boggarapu提到的方法后,我目前使用AVCaptureMetadataOutput来检测人脸,而不是使用CIFaceDetector。它能够正常工作。但是,当我尝试绘制人脸的边界时,它似乎位置不正确。有什么想法吗?

enter image description here

let metaDataOutput = AVCaptureMetadataOutput()

captureSession.sessionPreset = AVCaptureSessionPresetPhoto
    let backCamera = AVCaptureDevice.defaultDevice(withDeviceType: .builtInWideAngleCamera, mediaType: AVMediaTypeVideo, position: .back)
    do {
        let input = try AVCaptureDeviceInput(device: backCamera)

        if (captureSession.canAddInput(input)) {
            captureSession.addInput(input)

            // MetadataOutput instead
            if(captureSession.canAddOutput(metaDataOutput)) {
                captureSession.addOutput(metaDataOutput)

                metaDataOutput.setMetadataObjectsDelegate(self, queue: DispatchQueue.main)
                metaDataOutput.metadataObjectTypes = [AVMetadataObjectTypeFace]

                previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
                previewLayer?.frame = cameraView.bounds
                previewLayer?.videoGravity = AVLayerVideoGravityResizeAspectFill

                cameraView.layer.addSublayer(previewLayer!)
                captureSession.startRunning()
            }

        }

    } catch {
        print(error.localizedDescription)
    }

并且

extension CameraViewController: AVCaptureMetadataOutputObjectsDelegate {
func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
    if findFaceControl {
        findFaceControl = false
        for metadataObject in metadataObjects {
            if (metadataObject as AnyObject).type == AVMetadataObjectTypeFace {
                print("")
                print(metadataObject)
                let bounds = (metadataObject as! AVMetadataFaceObject).bounds
                print("origin x: \(bounds.origin.x)")
                print("origin y: \(bounds.origin.y)")
                print("size width: \(bounds.size.width)")
                print("size height: \(bounds.size.height)")
                print("cameraView width: \(self.cameraView.frame.width)")
                print("cameraView height: \(self.cameraView.frame.height)")
                var face = CGRect()
                face.origin.x = bounds.origin.x * self.cameraView.frame.width
                face.origin.y = bounds.origin.y * self.cameraView.frame.height
                face.size.width = bounds.size.width * self.cameraView.frame.width
                face.size.height = bounds.size.height * self.cameraView.frame.height
                print(face)

                showBounds(at: face)
            }
        }
    }

}
}

原始

在Github中查看

var captureSession = AVCaptureSession()
var photoOutput = AVCapturePhotoOutput()
var previewLayer: AVCaptureVideoPreviewLayer?    

override func viewWillAppear(_ animated: Bool) {
    super.viewWillAppear(true)

    captureSession.sessionPreset = AVCaptureSessionPresetHigh

    let backCamera = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeVideo)
    do {
        let input = try AVCaptureDeviceInput(device: backCamera)

        if (captureSession.canAddInput(input)) {
            captureSession.addInput(input)

            if(captureSession.canAddOutput(photoOutput)){
                captureSession.addOutput(photoOutput)
                captureSession.startRunning()

                previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
                previewLayer?.videoGravity = AVLayerVideoGravityResizeAspectFill
                previewLayer?.frame = cameraView.bounds

                cameraView.layer.addSublayer(previewLayer!)
            }
        }

    } catch {
        print(error.localizedDescription)
    }

}

func captureImage() {
    let settings = AVCapturePhotoSettings()
    let previewPixelType = settings.availablePreviewPhotoPixelFormatTypes.first!
    let previewFormat = [kCVPixelBufferPixelFormatTypeKey as String: previewPixelType
                         ]
    settings.previewPhotoFormat = previewFormat
    photoOutput.capturePhoto(with: settings, delegate: self)

}



func capture(_ captureOutput: AVCapturePhotoOutput, didFinishProcessingPhotoSampleBuffer photoSampleBuffer: CMSampleBuffer?, previewPhotoSampleBuffer: CMSampleBuffer?, resolvedSettings: AVCaptureResolvedPhotoSettings, bracketSettings: AVCaptureBracketedStillImageSettings?, error: Error?) {
    if let error = error {
        print(error.localizedDescription)
    }
    // Not include previewPhotoSampleBuffer
    if let sampleBuffer = photoSampleBuffer,
        let dataImage = AVCapturePhotoOutput.jpegPhotoDataRepresentation(forJPEGSampleBuffer: sampleBuffer, previewPhotoSampleBuffer: nil) {
            self.imageView.image = UIImage(data: dataImage)
            self.imageView.isHidden = false
            self.previewLayer?.isHidden = true
            self.findFace(img: self.imageView.image!)
        }
}
findFace 可以处理普通图像。然而,我通过相机拍摄的图像无法正常工作,有时只能识别一个人脸。 普通图像

enter image description here

捕获图像

enter image description here

func findFace(img: UIImage) {
    guard let faceImage = CIImage(image: img) else { return }
    let accuracy = [CIDetectorAccuracy: CIDetectorAccuracyHigh]
    let faceDetector = CIDetector(ofType: CIDetectorTypeFace, context: nil, options: accuracy)


    // For converting the Core Image Coordinates to UIView Coordinates
    let detectedImageSize = faceImage.extent.size
    var transform = CGAffineTransform(scaleX: 1, y: -1)
    transform = transform.translatedBy(x: 0, y: -detectedImageSize.height)


    if let faces = faceDetector?.features(in: faceImage, options: [CIDetectorSmile: true, CIDetectorEyeBlink: true]) {
        for face in faces as! [CIFaceFeature] {

            // Apply the transform to convert the coordinates
            var faceViewBounds =  face.bounds.applying(transform)
            // Calculate the actual position and size of the rectangle in the image view
            let viewSize = imageView.bounds.size
            let scale = min(viewSize.width / detectedImageSize.width,
                            viewSize.height / detectedImageSize.height)
            let offsetX = (viewSize.width - detectedImageSize.width * scale) / 2
            let offsetY = (viewSize.height - detectedImageSize.height * scale) / 2

            faceViewBounds = faceViewBounds.applying(CGAffineTransform(scaleX: scale, y: scale))
            print("faceBounds = \(faceViewBounds)")
            faceViewBounds.origin.x += offsetX
            faceViewBounds.origin.y += offsetY

            showBounds(at: faceViewBounds)
        }

        if faces.count != 0 {
            print("Number of faces: \(faces.count)")
        } else {
            print("No faces ")
        }
    }


}

func showBounds(at bounds: CGRect) {
    let indicator = UIView(frame: bounds)
    indicator.frame =  bounds
    indicator.layer.borderWidth = 3
    indicator.layer.borderColor = UIColor.red.cgColor
    indicator.backgroundColor = .clear

    self.imageView.addSubview(indicator)
    faceBoxes.append(indicator)

}

你应该使用 CIDetector 来检测人脸。 - aircraft
使用此示例在此处。此示例具有矩形/正方形和QR码的实时检测,但您可以轻松调整以检测面部。您可以使用此示例更改覆盖层和各种其他内容,它非常可定制。希望这可以帮助您:D - Munib
你正在使用以下代码过滤结果:options: [CIDetectorSmile: true, CIDetectorEyeBlink: true],以强行检测笑容和眨眼。这是你想要的吗?这可能会导致在检测人脸时出现较差的结果。 - ricardopereira
我已经按照你提到的设置了 options: nil,但它仍然没有按预期工作。 - Willjay
我已经使用iOS11的“Image”框架更新了一个solution。它只是正常工作:D - Willjay
显示剩余3条评论
6个回答

13

检测脸部有两种方法:CIFaceDetector 和 AVCaptureMetadataOutput。根据你的要求,选择适合你的。

CIFaceDetector 拥有更多的功能,它可以给出眼睛和嘴巴的位置、微笑检测等等。

另一方面,AVCaptureMetadataOutput 是在帧上计算的,检测到的面孔被跟踪,我们不需要添加额外的代码。我发现,由于追踪的缘故,在这个过程中检测到的人脸更加可靠。这样做的缺点是你仅仅只能检测到人脸,不能知道眼睛或嘴巴的位置。这种方法的另一个优点是方向问题较小,因为您可以使用videoOrientation 相对于朝向改变时的设备方向来确定面部的方向。

在我使用的应用中,所需的格式是 YUV420,所以实时使用 CIDetector(它与 RGB 一起工作)是不可行的。使用 AVCaptureMetadataOutput 节省了很多精力,并且由于持续跟踪而表现更可靠。

一旦我有了脸部的边界框,我编写了额外的功能,比如皮肤检测,并将其应用于静止图像。

注意:当捕获静态图像时,人脸框信息会随元数据一起添加,因此没有同步问题。

您也可以将两种方法结合使用以获得更好的结果。

根据您的应用程序探索和评估其优缺点。


人脸矩形相对于图像原点。因此,在屏幕上可能会有所不同。使用:

for (AVMetadataFaceObject *faceFeatures in metadataObjects) {
    CGRect face = faceFeatures.bounds;
    CGRect facePreviewBounds = CGRectMake(face.origin.y * previewLayerRect.size.width,
                               face.origin.x * previewLayerRect.size.height,
                               face.size.width * previewLayerRect.size.height,
                               face.size.height * previewLayerRect.size.width);

    /* Draw rectangle facePreviewBounds on screen */
}

1
我将 metadataObjectTypes 设置为 [AVMetadataObjectTypeFace]。当发现人脸后,didOutputMetadataObjects 将被调用。但是,我该如何在屏幕上绘制一个矩形呢? - Willjay
iOS7-day-by-day中,它使用AVCaptureMetadataOutput检测面部,并随后使用AVCaptureStillImageOutputCIFaceDetector - Willjay
当您最终捕获图像时,请在CMSampleBuffer中打印元数据(使用CMSampleBufferGetSampleAttachmentsArray)。我的工作区使用较旧的API,但我相信元数据不会因版本更改而改变。无论如何,我将尝试更新它以使用iOS 10 API,然后让您知道。 - Shravya Boggarapu
屏幕的参考/原点与视频帧不同,仅此而已。 - Shravya Boggarapu
顺便说一句,关于矩形的更清晰解释:y代表列 => 宽度,x代表行 => 高度。尺寸并不是主要的事情,它只应该与面部大小成比例。 - Shravya Boggarapu
显示剩余6条评论

8

谢谢Wei!我fork了你的项目并更新了它,现在还可以检测面部标志:https://github.com/wanderingstan/AppleFaceDetection - Stan James
1
新的Vision框架不提供微笑和睁眼检测。 - Oleksii Nezhyborets

3

有点晚了,但这里是解决坐标问题的方法。您可以在预览层上调用一个方法,将元数据对象转换为您的坐标系统:transformedMetadataObject(for: metadataObject)。

guard let transformedObject = previewLayer.transformedMetadataObject(for: metadataObject) else {
     continue
}
let bounds = transformedObject.bounds
showBounds(at: bounds)

来源:https://developer.apple.com/documentation/avfoundation/avcapturevideopreviewlayer/1623501-transformedmetadataobjectformeta

顺便提一下,如果您正在使用(或升级您的项目到)Swift 4,AVCaptureMetadataOutputsObject的委托方法已经改变为:

func metadataOutput(_ output: AVCaptureMetadataOutput, didOutput metadataObjects: [AVMetadataObject], from connection: AVCaptureConnection)

亲切的问候。

2
extension CameraViewController: AVCaptureMetadataOutputObjectsDelegate {
  func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
    if findFaceControl {
      findFaceControl = false
      let faces = metadata.flatMap { $0 as? AVMetadataFaceObject } .flatMap { (face) -> CGRect in
                  guard let localizedFace =
      previewLayer?.transformedMetadataObject(for: face) else { return nil }
                  return localizedFace.bounds }
      for face in faces {
        let temp = UIView(frame: face)
        temp.layer.borderColor = UIColor.white
        temp.layer.borderWidth = 2.0
        view.addSubview(view: temp)
      }
    }
  }
}

请务必删除由didOutputMetadataObjects创建的视图。

跟踪活动面部ID是最好的方法。

此外,当您尝试查找预览层中面部的位置时,使用面部数据和变换要容易得多。我认为CIDetector很垃圾,metadataoutput将使用硬件设备进行人脸检测,使其非常快速。


1
  1. 创建CaptureSession
  2. 对于AVCaptureVideoDataOutput,请创建以下设置:

    output.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as AnyHashable: Int(kCMPixelFormat_32BGRA) ]

3.当您收到CMSampleBuffer时,请创建图像。

DispatchQueue.main.async {
    let sampleImg = self.imageFromSampleBuffer(sampleBuffer: sampleBuffer)
    self.imageView.image = sampleImg
}
func imageFromSampleBuffer(sampleBuffer : CMSampleBuffer) -> UIImage
    {
        // Get a CMSampleBuffer's Core Video image buffer for the media data
        let  imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
        // Lock the base address of the pixel buffer
        CVPixelBufferLockBaseAddress(imageBuffer!, CVPixelBufferLockFlags.readOnly);


        // Get the number of bytes per row for the pixel buffer
        let baseAddress = CVPixelBufferGetBaseAddress(imageBuffer!);

        // Get the number of bytes per row for the pixel buffer
        let bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer!);
        // Get the pixel buffer width and height
        let width = CVPixelBufferGetWidth(imageBuffer!);
        let height = CVPixelBufferGetHeight(imageBuffer!);

        // Create a device-dependent RGB color space
        let colorSpace = CGColorSpaceCreateDeviceRGB();

        // Create a bitmap graphics context with the sample buffer data
        var bitmapInfo: UInt32 = CGBitmapInfo.byteOrder32Little.rawValue
        bitmapInfo |= CGImageAlphaInfo.premultipliedFirst.rawValue & CGBitmapInfo.alphaInfoMask.rawValue
        //let bitmapInfo: UInt32 = CGBitmapInfo.alphaInfoMask.rawValue
        let context = CGContext.init(data: baseAddress, width: width, height: height, bitsPerComponent: 8, bytesPerRow: bytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo)
        // Create a Quartz image from the pixel data in the bitmap graphics context
        let quartzImage = context?.makeImage();
        // Unlock the pixel buffer
        CVPixelBufferUnlockBaseAddress(imageBuffer!, CVPixelBufferLockFlags.readOnly);

        // Create an image object from the Quartz image
        let image = UIImage.init(cgImage: quartzImage!);

        return (image);
    }

0

通过查看您的代码,我发现了两个可能导致面部检测错误/不良的问题。

  1. 其中一个是人脸检测器特征选项,您正在通过[CIDetectorSmile: true, CIDetectorEyeBlink: true]过滤结果。尝试将其设置为nil:faceDetector?.features(in: faceImage, options: nil)
  2. 我猜测另一个问题是图像方向的结果。我注意到您使用AVCapturePhotoOutput.jpegPhotoDataRepresentation方法生成用于检测的源图像,并且系统默认情况下会生成具有特定方向(例如Left/LandscapeLeft)的图像。因此,您可以告诉人脸检测器记住这一点,方法是使用CIDetectorImageOrientation键。
:此键的值是一个整数,范围为1..8,例如{{link1:kCGImagePropertyOrientation}}中找到的值。如果存在,则检测将基于该方向进行,但返回的特征的坐标仍将基于图像的坐标。

尝试像这样设置它faceDetector?.features(in: faceImage, options: [CIDetectorImageOrientation: 8 /*左,下*/])


我认为[CIDetectorSmile: true, CIDetectorEyeBlink: true]不是一个过滤器。它告诉检测器花费更多时间,以便能够返回指定的信息。因此,它实际上扩展了结果。 - Oleksii Nezhyborets

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接