Memo

メモ > 技術 > IDE: Xcode > SwiftUI+リアルタイム顔認識

■SwiftUI+リアルタイム顔認識
■検証中 SwiftUI-Vision/Realtime-Face-Tracking/Realtime-Face-Tracking at main - SatoTakeshiX/SwiftUI-Vision https://github.com/SatoTakeshiX/SwiftUI-Vision/tree/main/Realtime-Face-Tracking/Realtime-Face-Tracki... をもとに検証中 Info.plist にKey「Privacy - Camera Usage Description」を追加し、Valueに「顔を検出します。」と記載しておく ファイルの内容を直接確認すると、dictタグ内に以下が追加されている Info.plist
<key>NSCameraUsageDescription</key> <string>顔を検出します。</string>
ContentView.swift
import SwiftUI struct ContentView: View { @StateObject var viewModel = TrackingViewModel() var body: some View { ZStack { PreviewLayerView(previewLayer: viewModel.previewLayer, detectedRect: viewModel.detectedRects, pixelSize: viewModel.pixelSize) } .edgesIgnoringSafeArea(.all) .onAppear { viewModel.startSession() } } } struct ContentView_Previews: PreviewProvider { static var previews: some View { ContentView() } }
PreviewLayerView.swift
import SwiftUI import AVFoundation /// UIViewRepresentableを使うとview.frameがzeroになりlayerが描画されない。 /// UIViewControllerRepresentableを利用するとviewController.viewは端末サイズが与えられる struct PreviewLayerView: UIViewControllerRepresentable { typealias UIViewControllerType = UIViewController let previewLayer: AVCaptureVideoPreviewLayer let detectedRect: [CGRect] let pixelSize: CGSize func makeUIViewController(context: Context) -> UIViewController { let viewController = UIViewController() viewController.view.layer.addSublayer(previewLayer) previewLayer.frame = viewController.view.layer.frame return viewController } func updateUIViewController(_ uiViewController: UIViewController, context: Context) { previewLayer.frame = uiViewController.view.layer.frame drawFaceObservations(detectedRect) } func drawFaceObservations(_ detectedRects: [CGRect]) { // sublayerを削除 previewLayer.sublayers?.removeSubrange(1...) // pixelSizeで矩形作成 let captureDeviceBounds = CGRect( x: 0, y: 0, width: pixelSize.width, height: pixelSize.height ) let overlayLayer = CALayer() overlayLayer.name = "DetectionOverlay" overlayLayer.bounds = captureDeviceBounds overlayLayer.position = CGPoint( x: captureDeviceBounds.midX, y: captureDeviceBounds.midY ) print("overlay: befor: \(overlayLayer.frame)") // let videoPreviewRect = previewLayer.layerRectConverted(fromMetadataOutputRect: CGRect(x: 0, y: 0, width: 1, height: 1)) let (rotation, scaleX, scaleY) = makerotationAndScale(videoPreviewRect: videoPreviewRect, pixelSize: pixelSize) // Scale and mirror the image to ensure upright presentation. let affineTransform = CGAffineTransform(rotationAngle: radiansForDegrees(rotation)).scaledBy(x: scaleX, y: -scaleY) overlayLayer.setAffineTransform(affineTransform) overlayLayer.position = CGPoint(x: previewLayer.bounds.midX, y: previewLayer.bounds.midY) previewLayer.addSublayer(overlayLayer) print("overlay: after: \(overlayLayer.frame)") let layers = detectedRects.compactMap { detectedRect -> CALayer in let xMin = detectedRect.minX let yMax = detectedRect.maxY let detectedX = xMin * overlayLayer.frame.size.width + overlayLayer.frame.minX let detectedY = (1 - yMax) * overlayLayer.frame.size.height let detectedWidth = detectedRect.width * overlayLayer.frame.size.width let detectedHeight = detectedRect.height * overlayLayer.frame.size.height let layer = CALayer() layer.frame = CGRect(x: detectedX, y: detectedY, width: detectedWidth, height: detectedHeight) layer.borderWidth = 2.0 layer.borderColor = UIColor.green.cgColor return layer } layers.forEach { self.previewLayer.addSublayer($0) } } private func radiansForDegrees(_ degrees: CGFloat) -> CGFloat { return CGFloat(Double(degrees) * Double.pi / 180.0) } private func makerotationAndScale(videoPreviewRect: CGRect, pixelSize: CGSize) -> (rotation: CGFloat, scaleX: CGFloat, scaleY: CGFloat) { var rotation: CGFloat var scaleX: CGFloat var scaleY: CGFloat // Rotate the layer into screen orientation. switch UIDevice.current.orientation { case .portraitUpsideDown: rotation = 180 scaleX = videoPreviewRect.width / pixelSize.width scaleY = videoPreviewRect.height / pixelSize.height case .landscapeLeft: rotation = 90 scaleX = videoPreviewRect.height / pixelSize.width scaleY = scaleX case .landscapeRight: rotation = -90 scaleX = videoPreviewRect.height / pixelSize.width scaleY = scaleX default: rotation = 0 scaleX = videoPreviewRect.width / pixelSize.width scaleY = videoPreviewRect.height / pixelSize.height } return (rotation, scaleX, scaleY) } }
TrackingViewModel.swift
import Combine import UIKit import Vision import AVKit final class TrackingViewModel: ObservableObject { let captureSession = CaptureSession() let visionClient = VisionClient() var previewLayer: AVCaptureVideoPreviewLayer { return captureSession.previewLayer } @Published var detectedRects: [CGRect] = [] private var cancellables: Set<AnyCancellable> = [] init() { bind() } @Published var pixelSize: CGSize = .zero func bind() { captureSession.outputs .receive(on: RunLoop.main) .sink { [weak self] output in guard let self = self else { return } var requestHandlerOptions: [VNImageOption: AnyObject] = [:] // 内部データをVisionリクエストにオプションとして設定 requestHandlerOptions[VNImageOption.cameraIntrinsics] = output.cameraIntrinsicData // 画像サイズは保持する self.pixelSize = output.pixelBufferSize self.visionClient.request(cvPixelBuffer: output.pixelBuffer, orientation: self.makeOrientation(with: UIDevice.current.orientation), options: requestHandlerOptions) } .store(in: &cancellables) visionClient.$visionObjectObservations .receive(on: RunLoop.main) .map { observations -> [CGRect] in return observations.map { $0.boundingBox } } .assign(to: &$detectedRects) } func startSession() { captureSession.startSettion() } func makeOrientation(with deviceOrientation: UIDeviceOrientation) -> CGImagePropertyOrientation { switch deviceOrientation { case .portraitUpsideDown: return .rightMirrored case .landscapeLeft: return .downMirrored case .landscapeRight: return .upMirrored default: return .leftMirrored } } }
CaptureSession.swift
import Foundation import AVKit import Combine import SwiftUI final class CaptureSession: NSObject, ObservableObject { struct Outputs { let cameraIntrinsicData: CFTypeRef let pixelBuffer: CVImageBuffer let pixelBufferSize: CGSize } private let captureSession = AVCaptureSession() private var captureDevice: AVCaptureDevice? private var videoDataOutput: AVCaptureVideoDataOutput? private var videoDataOutputQueue: DispatchQueue? private(set) var previewLayer = AVCaptureVideoPreviewLayer() var outputs = PassthroughSubject<Outputs, Never>() private var cancellable: AnyCancellable? override init() { super.init() setupCaptureSession() } // MARK: - Create capture session private func setupCaptureSession() { captureSession.sessionPreset = .photo // use front camera if let availableDevice = AVCaptureDevice.DiscoverySession( deviceTypes: [.builtInWideAngleCamera], mediaType: .video, position: .front ).devices.first { captureDevice = availableDevice do { let captureDeviceInput = try AVCaptureDeviceInput(device: availableDevice) captureSession.addInput(captureDeviceInput) } catch { print(error.localizedDescription) } } makePreviewLayser(session: captureSession) // ここだけcombine。TODO: fix later cancellable = NotificationCenter.default.publisher(for: UIDevice.orientationDidChangeNotification) .map { _ in () } .prepend(()) // initial run .sink { [previewLayer] in let interfaceOrientation = UIApplication.shared.windows.first?.windowScene?.interfaceOrientation if let interfaceOrientation = interfaceOrientation, let orientation = AVCaptureVideoOrientation(interfaceOrientation: interfaceOrientation) { previewLayer.connection?.videoOrientation = orientation } } makeDataOutput() } func startSettion() { if captureSession.isRunning { return } captureSession.startRunning() } func stopSettion() { if !captureSession.isRunning { return } captureSession.stopRunning() } private func makePreviewLayser(session: AVCaptureSession) { let previewLayer = AVCaptureVideoPreviewLayer(session: session) previewLayer.name = "CameraPreview" previewLayer.videoGravity = .resizeAspectFill previewLayer.backgroundColor = UIColor.green.cgColor //previewLayer.borderWidth = 2 //previewLayer.borderColor = UIColor.black.cgColor self.previewLayer = previewLayer } private func makeDataOutput() { let videoDataOutput = AVCaptureVideoDataOutput() videoDataOutput.videoSettings = [ (kCVPixelBufferPixelFormatTypeKey as String): kCVPixelFormatType_32BGRA ] // frame落ちたら捨てる処理 videoDataOutput.alwaysDiscardsLateVideoFrames = true let videoDataOutputQueue = DispatchQueue(label: "com.Personal-Factory.Realtime-Face-Tracking") videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue) captureSession.beginConfiguration() if captureSession.canAddOutput(videoDataOutput) { captureSession.addOutput(videoDataOutput) } // to use CMGetAttachment in sampleBuffer if let captureConnection = videoDataOutput.connection(with: .video) { if captureConnection.isCameraIntrinsicMatrixDeliverySupported { captureConnection.isCameraIntrinsicMatrixDeliveryEnabled = true } } self.videoDataOutput = videoDataOutput self.videoDataOutputQueue = videoDataOutputQueue captureSession.commitConfiguration() } } extension CaptureSession: AVCaptureVideoDataOutputSampleBufferDelegate { func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { guard let cameraIntrinsicData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil) else { return } guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { print("Failed to obtain a CVPixelBuffer for the current output frame.") return } let width = CVPixelBufferGetWidth(pixelBuffer) let hight = CVPixelBufferGetHeight(pixelBuffer) self.outputs.send(.init( cameraIntrinsicData: cameraIntrinsicData, pixelBuffer: pixelBuffer, pixelBufferSize: CGSize(width: width, height: hight) )) } } // MARK: - AVCaptureVideoOrientation extension AVCaptureVideoOrientation: CustomDebugStringConvertible { public var debugDescription: String { switch self { case .portrait: return "portrait" case .portraitUpsideDown: return "portraitUpsideDown" case .landscapeRight: return "landscapeRight" case .landscapeLeft: return "landscapeLeft" @unknown default: return "unknown" } } public init?(deviceOrientation: UIDeviceOrientation) { switch deviceOrientation { case .portrait: self = .portrait case .portraitUpsideDown: self = .portraitUpsideDown case .landscapeLeft: self = .landscapeRight case .landscapeRight: self = .landscapeLeft case .faceUp, .faceDown, .unknown: return nil @unknown default: return nil } } public init?(interfaceOrientation: UIInterfaceOrientation) { switch interfaceOrientation { case .portrait: self = .portrait case .portraitUpsideDown: self = .portraitUpsideDown case .landscapeLeft: self = .landscapeLeft case .landscapeRight: self = .landscapeRight case .unknown: return nil @unknown default: return nil } } }
VisionClient.swift
import Foundation import Vision import Combine // tracking face via CVPixelBuffer final class VisionClient: NSObject, ObservableObject { enum State { case stop case tracking(trackingRequests: [VNTrackObjectRequest]) } @Published var visionObjectObservations: [VNDetectedObjectObservation] = [] @Published var state: State = .stop private var subscriber: Set<AnyCancellable> = [] private lazy var sequenceRequestHandler = VNSequenceRequestHandler() func request(cvPixelBuffer pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation, options: [VNImageOption : Any] = [:]) { switch state { case .stop: initialRequest(cvPixelBuffer: pixelBuffer, orientation: orientation, options: options) case .tracking(let trackingRequests): guard !trackingRequests.isEmpty else { initialRequest(cvPixelBuffer: pixelBuffer, orientation: orientation, options: options) break } do { try sequenceRequestHandler.perform(trackingRequests, on: pixelBuffer, orientation: orientation) } catch { print(error.localizedDescription) } // 次のトラッキングを設定 // perform実行後はresultsプロパティが更新されている let newTrackingRequests = trackingRequests.compactMap { request -> VNTrackObjectRequest? in guard let results = request.results else { return nil } guard let observation = results[0] as? VNDetectedObjectObservation else { return nil } if !request.isLastFrame { if observation.confidence > 0.3 { request.inputObservation = observation } else { request.isLastFrame = true } return request } else { return nil } } state = .tracking(trackingRequests: newTrackingRequests) if newTrackingRequests.isEmpty { // トラックするものがない self.visionObjectObservations = [] return } newTrackingRequests.forEach { request in guard let result = request.results as? [VNDetectedObjectObservation] else { return } self.visionObjectObservations = result } } } // MARK: Performing Vision Requests private func prepareRequest(completion: @escaping (Result<[VNTrackObjectRequest], Error>) -> Void) -> VNDetectFaceRectanglesRequest { var requests = [VNTrackObjectRequest]() let faceRequest = VNDetectFaceRectanglesRequest(completionHandler: { (request, error) in if let error = error { completion(.failure(error)) } guard let faceDetectionRequest = request as? VNDetectFaceRectanglesRequest, let results = faceDetectionRequest.results as? [VNFaceObservation] else { return } // Add the observations to the tracking list for obs in results { let faceTrackingRequest = VNTrackObjectRequest(detectedObjectObservation: obs) requests.append(faceTrackingRequest) } completion(.success(requests)) }) return faceRequest } private func initialRequest(cvPixelBuffer pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation, options: [VNImageOption : Any] = [:]) { // No tracking object detected, so perform initial detection let imageRequestHandler = VNImageRequestHandler( cvPixelBuffer: pixelBuffer, orientation: orientation, options: options ) do { let faceDetectionRequest = prepareRequest() { [weak self] result in switch result { case .success(let trackingRequests): self?.state = .tracking(trackingRequests: trackingRequests) case .failure(let error): print("error: \(String(describing: error)).") } } try imageRequestHandler.perform([faceDetectionRequest]) } catch let error as NSError { NSLog("Failed to perform FaceRectangleRequest: %@", error) } } }
プレビューを UIViewRepresentable ではなく UIViewControllerRepresentable で作成している UIViewRepresentable を使うとサイズ調整が厄介そうなためらしいが、使えないわけでは無いみたい 以下でも最低限のサンプルとともに解説されているので、挙動を比較しつつ試したい SwiftUIでAVFundationを導入する【Video Capture偏】 https://blog.personal-factory.com/2020/06/14/introduce-avfundation-by-swiftui/

Advertisement