AISP-PL · folkien · May 2, 2025 · May 2, 2025 · May 2, 2025 · May 2, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -1 +1,5 @@
 *.weights filter=lfs diff=lfs merge=lfs -text
+zoo/face-quality-assessment.onnx filter=lfs diff=lfs merge=lfs -text
+zoo/yolov8-lite-s.onnx filter=lfs diff=lfs merge=lfs -text
+zoo/yolov8-lite-t.onnx filter=lfs diff=lfs merge=lfs -text
+zoo/yolov8n-face.onnx filter=lfs diff=lfs merge=lfs -text
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -5,15 +5,14 @@
 	// List of extensions which should be recommended for users of this workspace.
 	"recommendations": [
 		"ms-python.python",
-		"ms-python.black-formatter",
-		"ms-python.flake8",
-		"ms-python.isort",
+		"charliermarsh.ruff",
 		"ms-python.mypy-type-checker",
 		"tamasfe.even-better-toml"
 
 	],
 	// List of extensions recommended by VS Code that should not be recommended for users of this workspace.
 	"unwantedRecommendations": [
-
+		"ms-python.isort",
+		"ms-python.black-formatter",
 	]
-}
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,9 +1,10 @@
 {
     "[python]": {
-        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.defaultFormatter": "charliermarsh.ruff",
         "editor.formatOnSave": true,
         "editor.codeActionsOnSave": {
-            "source.organizeImports": "explicit"
+            "source.organizeImports": "always",
+            "source.fixAll.ruff": "explicit"
         },
     },
     "[toml]": {
@@ -19,7 +20,9 @@
     "cSpell.enabled": false,
     "python.testing.pytestArgs": [
         "tests",
+        "-m not manual"
     ],
+    "github.copilot.nextEditSuggestions.enabled": false,
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
     "flake8.importStrategy": "fromEnvironment",
@@ -46,4 +49,4 @@
             "**/aisp-albumentations/**": true,
     },
     "cmake.ignoreCMakeListsMissing": true,
-}
+}
diff --git a/README.md b/README.md
@@ -1,11 +1,10 @@
-# Template : How to start and customize?
+# Model-Face - AISP repository for face detection and recognition
 
-- [ ] Create new repository from this template
-- [ ] Inside pyproject.toml rename `package_name`
-- [ ] Rename aisp_template directory to `package_name`
-- [ ] Update `README.md`
+Detector used for blurring or pixelating faces in images. Recognizer used for face recognition and verification.
 
-# Template directory structure
+![Face detection and recognition](docs/title.png)
+
+# Directory structure
 
 - package_name/ - Insert package code here
 - tests/ - Insert unit tests here

diff --git a/docs/title.png b/docs/title.png
diff --git a/images/1.jpg b/images/1.jpg
diff --git a/aisp_template/__init__.py → model_face/__init__.py b/aisp_template/__init__.py → model_face/__init__.py
diff --git a/model_face/detector/yolov8_face_detector.py b/model_face/detector/yolov8_face_detector.py
@@ -0,0 +1,210 @@
+import math
+
+import cv2
+import numpy as np
+import supervision as sv  # type: ignore
+
+
+class YOLOv8FaceDetection:
+    """YOLOv8 face detection model"""
+
+    def __init__(self, path, conf_thres=0.2, iou_thres=0.5, padding: int = 10):
+        """Initialize YOLOv8 face detection model."""
+        self.conf_threshold = conf_thres
+        self.iou_threshold = iou_thres
+        self.class_names = ["face"]
+        self.num_classes = len(self.class_names)
+        # Initialize model
+        self.net = cv2.dnn.readNet(path)
+        self.input_height = 640
+        self.input_width = 640
+        self.reg_max = 16
+        self.padding = padding
+
+        self.project = np.arange(self.reg_max)
+        self.strides = (8, 16, 32)
+        self.feats_hw = [
+            (
+                math.ceil(self.input_height / self.strides[i]),
+                math.ceil(self.input_width / self.strides[i]),
+            )
+            for i in range(len(self.strides))
+        ]
+        self.anchors = self.make_anchors(self.feats_hw)
+
+    def make_anchors(self, feats_hw, grid_cell_offset=0.5):
+        """Generate anchors from features."""
+        anchor_points = {}
+        for i, stride in enumerate(self.strides):
+            h, w = feats_hw[i]
+            x = np.arange(0, w) + grid_cell_offset  # shift x
+            y = np.arange(0, h) + grid_cell_offset  # shift y
+            sx, sy = np.meshgrid(x, y)
+            # sy, sx = np.meshgrid(y, x)
+            anchor_points[stride] = np.stack((sx, sy), axis=-1).reshape(-1, 2)
+
+        return anchor_points
+
+    def softmax(self, x, axis=1):
+        """Softmax function for multi-class classification."""
+        x_exp = np.exp(x)
+        x_sum = np.sum(x_exp, axis=axis, keepdims=True)
+        return x_exp / x_sum
+
+    def resize_image(self, srcimg, keep_ratio=True):
+        """Resize image to input size with padding."""
+        top, left, newh, neww = 0, 0, self.input_width, self.input_height
+        if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
+            hw_scale = srcimg.shape[0] / srcimg.shape[1]
+            if hw_scale > 1:
+                newh, neww = self.input_height, int(self.input_width / hw_scale)
+                img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
+                left = int((self.input_width - neww) * 0.5)
+                img = cv2.copyMakeBorder(
+                    img,
+                    0,
+                    0,
+                    left,
+                    self.input_width - neww - left,
+                    cv2.BORDER_CONSTANT,
+                    value=(0, 0, 0),
+                )  # add border
+            else:
+                newh, neww = int(self.input_height * hw_scale), self.input_width
+                img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
+                top = int((self.input_height - newh) * 0.5)
+                img = cv2.copyMakeBorder(
+                    img,
+                    top,
+                    self.input_height - newh - top,
+                    0,
+                    0,
+                    cv2.BORDER_CONSTANT,
+                    value=(0, 0, 0),
+                )
+        else:
+            img = cv2.resize(
+                srcimg,
+                (self.input_width, self.input_height),
+                interpolation=cv2.INTER_AREA,
+            )
+        return img, newh, neww, top, left
+
+    def detect(self, srcimg: np.ndarray) -> sv.Detections:
+        """
+        Detect objects in the image.
+
+        Arguments:
+        ----------
+        srcimg : np.ndarray
+            Input image in BGR format.
+
+        Returns:
+        --------
+        det_bboxes : np.ndarray
+            Detected bounding boxes.
+
+        """
+        input_img, newh, neww, padh, padw = self.resize_image(cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB))
+        scale_h, scale_w = srcimg.shape[0] / newh, srcimg.shape[1] / neww
+        input_img = input_img.astype(np.float32) / 255.0
+
+        blob = cv2.dnn.blobFromImage(input_img)
+        self.net.setInput(blob)
+        outputs = self.net.forward(self.net.getUnconnectedOutLayersNames())
+
+        # Perform inference on the image
+        det_xywh, det_conf, det_classid, landmarks = self.post_process(outputs, scale_h, scale_w, padh, padw)
+
+        # Padding : Increase width/height by +padding in px
+        det_xywh[:, 2:] += self.padding
+
+        # Convert to xyxy format
+        det_xyxy = sv.xywh_to_xyxy(det_xywh)
+
+        return sv.Detections(
+            xyxy=det_xyxy.astype(int),
+            class_id=det_classid.astype(int),
+            confidence=det_conf.astype(float),
+            data={"landmarks": landmarks.astype(float)},
+        )
+
+    def post_process(self, preds, scale_h, scale_w, padh, padw):
+        bboxes, scores, landmarks = [], [], []
+        for _i, pred in enumerate(preds):
+            stride = int(self.input_height / pred.shape[2])
+            pred = pred.transpose((0, 2, 3, 1))
+
+            box = pred[..., : self.reg_max * 4]
+            cls = 1 / (1 + np.exp(-pred[..., self.reg_max * 4 : -15])).reshape((-1, 1))
+            kpts = pred[..., -15:].reshape((-1, 15))  ### x1,y1,score1, ..., x5,y5,score5
+
+            # tmp = box.reshape(self.feats_hw[i][0], self.feats_hw[i][1], 4, self.reg_max)
+            tmp = box.reshape(-1, 4, self.reg_max)
+            bbox_pred = self.softmax(tmp, axis=-1)
+            bbox_pred = np.dot(bbox_pred, self.project).reshape((-1, 4))
+
+            bbox = (
+                self.distance2bbox(
+                    self.anchors[stride],
+                    bbox_pred,
+                    max_shape=(self.input_height, self.input_width),
+                )
+                * stride
+            )
+            kpts[:, 0::3] = (kpts[:, 0::3] * 2.0 + (self.anchors[stride][:, 0].reshape((-1, 1)) - 0.5)) * stride
+            kpts[:, 1::3] = (kpts[:, 1::3] * 2.0 + (self.anchors[stride][:, 1].reshape((-1, 1)) - 0.5)) * stride
+            kpts[:, 2::3] = 1 / (1 + np.exp(-kpts[:, 2::3]))
+
+            bbox -= np.array([[padw, padh, padw, padh]])
+            bbox *= np.array([[scale_w, scale_h, scale_w, scale_h]])
+            kpts -= np.tile(np.array([padw, padh, 0]), 5).reshape((1, 15))
+            kpts *= np.tile(np.array([scale_w, scale_h, 1]), 5).reshape((1, 15))
+
+            bboxes.append(bbox)
+            scores.append(cls)
+            landmarks.append(kpts)
+
+        bboxes = np.concatenate(bboxes, axis=0)
+        scores = np.concatenate(scores, axis=0)
+        landmarks = np.concatenate(landmarks, axis=0)
+
+        bboxes_wh = bboxes.copy()
+        bboxes_wh[:, 2:4] = bboxes[:, 2:4] - bboxes[:, 0:2]  ####xywh
+        class_ids = np.argmax(scores, axis=1)
+        confidences = np.max(scores, axis=1)  ####max_class_confidence
+
+        mask = confidences > self.conf_threshold
+        bboxes_wh = bboxes_wh[mask]
+        confidences = confidences[mask]
+        class_ids = class_ids[mask]
+        landmarks = landmarks[mask]
+
+        indices = cv2.dnn.NMSBoxes(
+            bboxes_wh.tolist(),
+            confidences.tolist(),
+            self.conf_threshold,
+            self.iou_threshold,
+        ).flatten()  ## type: ignore
+        if len(indices) > 0:
+            mlvl_bboxes = bboxes_wh[indices]
+            confidences = confidences[indices]
+            class_ids = class_ids[indices]
+            landmarks = landmarks[indices]
+            return mlvl_bboxes, confidences, class_ids, landmarks
+
+        return np.array([]), np.array([]), np.array([]), np.array([])
+
+    def distance2bbox(self, points, distance, max_shape=None):
+        """Convert distance to bounding box coordinates."""
+        x1 = points[:, 0] - distance[:, 0]
+        y1 = points[:, 1] - distance[:, 1]
+        x2 = points[:, 0] + distance[:, 2]
+        y2 = points[:, 1] + distance[:, 3]
+        if max_shape is not None:
+            x1 = np.clip(x1, 0, max_shape[1])
+            y1 = np.clip(y1, 0, max_shape[0])
+            x2 = np.clip(x2, 0, max_shape[1])
+            y2 = np.clip(y2, 0, max_shape[0])
+
+        return np.stack([x1, y1, x2, y2], axis=-1)
diff --git a/model_face/helpers/transformations.py b/model_face/helpers/transformations.py
@@ -0,0 +1,50 @@
+import cv2
+import numpy as np
+
+
+def blur_box(image: np.ndarray, box: tuple[int, int, int, int], size: int = 10) -> np.ndarray:
+    """
+    Apply a Gaussian blur to a specified rectangular region (box) within the image.
+
+    Parameters:
+    ----------------
+        image (np.ndarray): The input image to modify.
+        box (tuple[int, int, int, int]): The rectangular region to blur, defined as (x1, y1, x2, y2).
+        size (int): The maximum kernel size for the Gaussian blur. Must be a positive odd integer.
+
+    Returns:
+    ----------------
+        np.ndarray: The image with the specified region blurred.
+    """
+    x1, y1, x2, y2 = box
+
+    box_width = int(x2 - x1) // 2
+    box_height = int(y2 - y1) // 2
+    size = min(box_width, box_height, size)
+    size = size + 1 if size % 2 == 0 else size
+    image[y1:y2, x1:x2] = cv2.GaussianBlur(image[y1:y2, x1:x2], (size, size), cv2.BORDER_ISOLATED)
+    return image
+
+
+def pixelate_box(image: np.ndarray, box: tuple[int, int, int, int], pixel_size: int = 8) -> np.ndarray:
+    """
+    Pixelate a specified rectangular region (box) within the image.
+
+    Parameters:
+    ----------------
+        image (np.ndarray): The input image to modify.
+        box (tuple[int, int, int, int]): The rectangular region to pixelate, defined as (x1, y1, x2, y2).
+        pixel_size (int): The size of the pixels in the pixelated region.
+
+    Returns:
+    ----------------
+        np.ndarray: The image with the specified region pixelated.
+    """
+    roi = image[box[1] : box[3], box[0] : box[2]]
+    downscaled_width = max(1, roi.shape[1] // pixel_size)
+    downscaled_height = max(1, roi.shape[0] // pixel_size)
+    box_resized = cv2.resize(roi, (downscaled_width, downscaled_height), interpolation=cv2.INTER_NEAREST)
+    image[box[1] : box[3], box[0] : box[2]] = cv2.resize(
+        box_resized, (roi.shape[1], roi.shape[0]), interpolation=cv2.INTER_NEAREST
+    )
+    return image