-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathobject_detector.py
More file actions
150 lines (121 loc) · 5.74 KB
/
object_detector.py
File metadata and controls
150 lines (121 loc) · 5.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from ultralytics import YOLO
import cv2
import numpy as np
from typing import Dict, List, Optional
class ObjectDetector:
# Known average heights of common objects in meters
KNOWN_HEIGHTS = {
'person': 1.7,
'chair': 0.8,
'car': 1.5,
'truck': 2.5,
'bicycle': 1.0,
'motorcycle': 1.2,
'dog': 0.5,
'cat': 0.3,
}
def __init__(self, model_path: str = 'yolov8n.pt', frame_width: int = 640, frame_height: int = 320):
"""Initialize the YOLO model."""
self.model = YOLO(model_path, task="detect")
self.frame_width = frame_width
self.frame_height = frame_height
# Camera parameters (can be calibrated)
self.focal_length = frame_width # A reasonable default
self.sensor_height = frame_height
self.vertical_fov = 50 # Typical vertical FOV for webcams (degrees)
self.model.overrides['imgsz'] = 320
def _calculate_distance(self, box_height: float, object_class: str) -> Dict[str, float]:
"""
Calculate distance using multiple methods and return the most reliable estimate.
Args:
box_height: Height of bounding box in pixels
object_class: Class name of detected object
Returns:
Dictionary containing distance information
"""
# Method 1: Basic ratio method (current approach enhanced)
ratio = box_height / self.frame_height
basic_distance = "close" if ratio > 0.33 else "far" if ratio < 0.16 else "medium"
# Method 2: Known object size method (when available)
distance_meters = None
confidence = "low"
if object_class in self.KNOWN_HEIGHTS:
# Using the formula: Distance = (Known Height × Focal Length) / Pixel Height
known_height = self.KNOWN_HEIGHTS[object_class]
distance_meters = (known_height * self.focal_length) / box_height
# Adjust confidence based on object size and class
if box_height > 20: # Minimum size for reliable measurement
confidence = "high" if object_class in ['person', 'car', 'truck'] else "medium"
return {
"category": basic_distance,
"meters": distance_meters,
"confidence": confidence
}
def process_frame(self, frame: np.ndarray) -> Dict:
"""
Process a single frame and return detection results.
Args:
frame: numpy array of the image frame
Returns:
Dictionary containing detected objects and their details
"""
# Resize frame to specified dimensions
frame = cv2.resize(frame, (self.frame_width, self.frame_height))
# Process frame with YOLO
results = self.model(frame)[0]
detections = []
for box in results.boxes:
x1, y1, x2, y2 = box.xyxy[0].tolist()
confidence = float(box.conf[0])
class_id = int(box.cls[0])
class_name = results.names[class_id]
# Calculate position in frame (left, center, right)
center_x = (x1 + x2) / 2
position = "left" if center_x < self.frame_width/3 else "right" if center_x > 2*self.frame_width/3 else "center"
# Calculate distance using enhanced method
box_height = y2 - y1
distance_info = self._calculate_distance(box_height, class_name)
detection = {
"object": class_name,
"confidence": round(confidence, 2),
"position": position,
"distance": distance_info["category"],
"distance_meters": distance_info["meters"],
"distance_confidence": distance_info["confidence"],
"bbox": [round(x, 2) for x in [x1, y1, x2, y2]]
}
detections.append(detection)
return {
"detections": detections,
"frame_size": (self.frame_height, self.frame_width)
}
def draw_detections(self, frame: np.ndarray, detections: Dict) -> np.ndarray:
"""Draw bounding boxes and labels on the frame."""
# Resize frame if needed
frame = cv2.resize(frame, (self.frame_width, self.frame_height))
for det in detections["detections"]:
x1, y1, x2, y2 = map(int, det["bbox"])
# Color based on distance
color = (0, 255, 0) # Green for far
if det["distance"] == "close":
color = (0, 0, 255) # Red for close
elif det["distance"] == "medium":
color = (0, 165, 255) # Orange for medium
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
# Create detailed label with distance information
label_parts = [det['object']]
if det['distance_meters'] is not None:
label_parts.append(f"{det['distance_meters']:.1f}m")
label_parts.append(f"({det['confidence']:.2f})")
label = " ".join(label_parts)
cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
return frame
def calibrate_camera(self, known_distance: float, known_height: float, measured_pixels: float):
"""
Calibrate camera parameters using a known object.
Args:
known_distance: Real distance to object in meters
known_height: Real height of object in meters
measured_pixels: Height of object in pixels
"""
self.focal_length = (measured_pixels * known_distance) / known_height