From bceadd33cf2c772175cf3e8552c9be0eabf94b3a Mon Sep 17 00:00:00 2001 From: y-veys Date: Wed, 25 Mar 2026 18:07:46 -0400 Subject: [PATCH 1/8] Change YOLOWorld to use YOLOE-seg detector --- spot_tools/src/spot_skills/detection_utils.py | 4 ++-- spot_tools/src/spot_skills/grasp_utils.py | 5 ++--- spot_tools_ros/examples/test_spot_executor_ros.py | 7 +++++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/spot_tools/src/spot_skills/detection_utils.py b/spot_tools/src/spot_skills/detection_utils.py index 392fb63..59e6259 100644 --- a/spot_tools/src/spot_skills/detection_utils.py +++ b/spot_tools/src/spot_skills/detection_utils.py @@ -2,7 +2,7 @@ import cv2 import numpy as np -from ultralytics import YOLOWorld +from ultralytics import YOLOE class Detector: @@ -18,7 +18,7 @@ def __init__(self, spot, yolo_world_path): if not yolo_world_path: raise ValueError("YOLOWorld model path must be provided.") - self.yolo_model = YOLOWorld(yolo_world_path) + self.yolo_model = YOLOE(yolo_world_path) custom_classes = ["", "bag", "cone", "pipe"] self.yolo_model.set_classes(custom_classes) print("Set classes for YOLOWorld model.") diff --git a/spot_tools/src/spot_skills/grasp_utils.py b/spot_tools/src/spot_skills/grasp_utils.py index aa31737..3376f51 100644 --- a/spot_tools/src/spot_skills/grasp_utils.py +++ b/spot_tools/src/spot_skills/grasp_utils.py @@ -26,7 +26,6 @@ from bosdyn.client.robot_command import RobotCommandBuilder, block_until_arm_arrives from spot_skills.arm_utils import ( - arm_to_drop, close_gripper, open_gripper, stow_arm, @@ -116,7 +115,7 @@ def object_place(spot, semantic_class="bag", position=None): time.sleep(0.25) # arm_to_carry(spot) # stow_arm(spot) - arm_to_drop(spot) + # arm_to_drop(spot) # odom_T_task = get_root_T_ground_body( # robot_state=spot.get_state(), root_frame_name=ODOM_FRAME_NAME @@ -153,7 +152,7 @@ def object_place(spot, semantic_class="bag", position=None): execute_recovery_action( spot, recover_arm=False, - relative_pose=math_helpers.SE2Pose(x=0.0, y=1.0, angle=0), + relative_pose=math_helpers.SE2Pose(x=0.0, y=-1.0, angle=0), ) time.sleep(1) return True diff --git a/spot_tools_ros/examples/test_spot_executor_ros.py b/spot_tools_ros/examples/test_spot_executor_ros.py index d6f54c9..9782c97 100644 --- a/spot_tools_ros/examples/test_spot_executor_ros.py +++ b/spot_tools_ros/examples/test_spot_executor_ros.py @@ -47,7 +47,7 @@ def __init__(self): # ) pick_cmd = Pick( - "hamilton/odom", "bag", np.array([5.0, 5, 0]), np.array([7.0, 7, 0]) + "hamilton/odom", "cone", np.array([5.0, 5, 0]), np.array([7.0, 7, 0]), "" ) path = np.array( @@ -60,12 +60,15 @@ def __init__(self): second_follow_cmd = Follow("hamilton/odom", path) place_cmd = Place( - "hamilton/odom", "bag", np.array([5.0, 5, 0]), np.array([7.0, 7, 0]) + "hamilton/odom", "cone", np.array([5.0, 5, 0]), np.array([7.0, 7, 0]), "" ) seq = ActionSequence( "id0", "spot", [follow_cmd, pick_cmd, second_follow_cmd, place_cmd] ) + # seq = ActionSequence( + # "id0", "spot", [follow_cmd, pick_cmd] + # ) publisher.publish(to_msg(seq)) viz_publisher.publish(to_viz_msg(seq, "planner_ns")) From 5da3d1fedfaa3b821e2b55ed9ff341f4a8f32a72 Mon Sep 17 00:00:00 2001 From: Jake Arkin Date: Fri, 27 Mar 2026 14:07:19 -0400 Subject: [PATCH 2/8] Update manipulation approval request process --- spot_tools/src/spot_skills/detection_utils.py | 38 ++++++---- spot_tools/src/spot_skills/grasp_utils.py | 46 +++++++------ .../src/spot_tools_ros/spot_executor_ros.py | 69 ++++++++++++++----- 3 files changed, 100 insertions(+), 53 deletions(-) diff --git a/spot_tools/src/spot_skills/detection_utils.py b/spot_tools/src/spot_skills/detection_utils.py index 59e6259..be19eb3 100644 --- a/spot_tools/src/spot_skills/detection_utils.py +++ b/spot_tools/src/spot_skills/detection_utils.py @@ -39,19 +39,25 @@ def set_up_detector(self, semantic_class): print(f"Updated recognized classes: {updated_classes}") def return_centroid(self, img_source, semantic_class, debug): - image, img = self.spot.get_image_RGB(view=img_source) + # Primary image + primary_image, primary_img = self.spot.get_image_RGB(view=img_source) + primary_xy = self._get_centroid(primary_img, semantic_class, rotate=0, debug=debug) - xy = self._get_centroid(img, semantic_class, rotate=0, debug=debug) - if xy is None: - print("Object not found in first image. Looking around!") - xy, image, img, image_source = self._look_for_object( - semantic_class, debug=debug - ) + # Always scan all other cameras + other_candidates, other_detection_index = self._look_for_object(semantic_class, debug=debug) - if xy is None: - print("Object not found near robot") + # Combine: primary first + candidates = [(primary_image, primary_img, primary_xy)] + other_candidates - return xy, image, img + if primary_xy is not None: + detection_index = 0 + elif other_detection_index is not None: + detection_index = other_detection_index + 1 # offset for prepended primary + else: + detection_index = None + print("Object not found in any camera") + + return detection_index, candidates def _get_centroid(self, img, semantic_class, rotate, debug): if rotate == 0: @@ -118,6 +124,9 @@ def _get_centroid(self, img, semantic_class, rotate, debug): def _look_for_object(self, semantic_class, debug): sources = self.spot.image_client.list_image_sources() + candidates = [] # list of (bosdyn_image, cv2_img, xy_or_None) + detection_index = None + for source in sources: if ( "depth" in source.name or source.name == "hand_image" @@ -143,11 +152,12 @@ def _look_for_object(self, semantic_class, debug): print("Found object centroid:", xy) if xy is None: print(f"Object not found in {image_source}.") - continue - else: - return xy, image, img, image_source - return None, None, None, None + candidates.append((image, img, xy)) + if xy is not None and detection_index is None: + detection_index = len(candidates) - 1 + + return candidates, detection_index class SemanticDetector(Detector): diff --git a/spot_tools/src/spot_skills/grasp_utils.py b/spot_tools/src/spot_skills/grasp_utils.py index 3376f51..cc13f4f 100644 --- a/spot_tools/src/spot_skills/grasp_utils.py +++ b/spot_tools/src/spot_skills/grasp_utils.py @@ -187,17 +187,18 @@ def object_grasp( # Set up the detector (e.g., for YOLOWorld, this may mean updating recognized classes) detector.set_up_detector(semantic_class) + candidates = None while attempts < 2 and not success: attempts += 1 if not user_input: # Try to get the centroid using the detector passed into the function. - xy, image, img = detector.return_centroid( + detection_index, candidates = detector.return_centroid( image_source, semantic_class, debug=debug ) # If the detector fails to return the centroid, then try again until max_attempts - if xy is None: + if detection_index is None: continue else: @@ -206,9 +207,11 @@ def object_grasp( else: image, img = spot.get_image_RGB(view=image_source) xy = get_user_grasp_input(spot, img) + candidates = [(image, img, xy)] + detection_index = 0 print("Found object centroid:", xy) - if xy is None: + if detection_index is None or candidates is None: if feedback is not None: feedback.print( "INFO", @@ -223,27 +226,28 @@ def object_grasp( ) time.sleep(1) return False - # execute_recovery_action(spot, recover_arm=True) - # spot.sit() - # raise Exception( - # "Failed to find an object in any cameras after 2 attempts. Please check the detector or user input." - # ) - # If xy is not None, then display the annotated image - else: - if feedback is not None: - annotated_img = copy(img) + image, _, xy = candidates[detection_index] - response = feedback.bounding_box_detection_feedback( - annotated_img, - xy[0], - xy[1], - semantic_class, - ) + # Display all candidate images in the approval panel + if feedback is not None: + annotated_imgs = [copy(img) for (_, img, _) in candidates] + + approved, updated_xy, selected_index = feedback.bounding_box_detection_feedback( + annotated_imgs, + detection_index, + xy[0] if xy else None, + xy[1] if xy else None, + semantic_class, + ) + + if approved is not None and not approved: + feedback.print("INFO", "User requested abort.") + return False - if response is not None and not response: - feedback.print("INFO", "User requested abort.") - return False + # Use selected camera image and pixel (panel always sends the correct selection) + image, _, _ = candidates[selected_index] + xy = updated_xy pick_vec = geometry_pb2.Vec2(x=xy[0], y=xy[1]) stow_arm(spot) diff --git a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py index 8b7dc0b..5981ea9 100755 --- a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py +++ b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py @@ -36,6 +36,8 @@ from spot_tools_ros.occupancy_grid_ros_updater import OccupancyGridROSUpdater from spot_tools_ros.utils import get_tf_pose, waypoints_to_path +from nlu_interface_rviz.msg import ManipulationApprovalRequest, ManipulationApprovalResponse + def load_inverse_semantic_id_map_from_label_space(fn): with open(fn, "r") as fo: @@ -77,7 +79,11 @@ def build_markers(pts, namespaces, frames, colors): class RosFeedbackCollector: def __init__(self, odom_frame: str, output_dir: str): self.pick_confirmation_event = threading.Event() - self.pick_confirmation_response = False + #self.pick_confirmation_response = False + + self.pick_confirmation_approved = False + self.pick_confirmation_xy = [0,0] + self.pick_confirmation_image_index = 0 self.break_out_of_waiting_loop = False self.odom_frame = odom_frame @@ -85,11 +91,13 @@ def __init__(self, odom_frame: str, output_dir: str): self.output_dir = output_dir def bounding_box_detection_feedback( - self, annotated_img, centroid_x, centroid_y, semantic_class + self, annotated_imgs, detection_index, centroid_x, centroid_y, semantic_class ): bridge = CvBridge() - if centroid_x is not None and centroid_y is not None: + # Annotate only the detection image + if detection_index is not None and centroid_x is not None and centroid_y is not None: + annotated_img = annotated_imgs[detection_index] label = f"{semantic_class}" cv2.putText( annotated_img, @@ -100,8 +108,6 @@ def bounding_box_detection_feedback( (0, 0, 225), 20, ) - - # Label the centroid cv2.drawMarker( annotated_img, (centroid_x, centroid_y), @@ -111,8 +117,12 @@ def bounding_box_detection_feedback( thickness=30, ) - annotated_img_msg = bridge.cv2_to_imgmsg(annotated_img, encoding="passthrough") - self.annotated_img_pub.publish(annotated_img_msg) + request_msg = ManipulationApprovalRequest() + request_msg.images = [bridge.cv2_to_imgmsg(img, encoding="passthrough") for img in annotated_imgs] + request_msg.detection_image_index = detection_index if detection_index is not None else 0 + request_msg.image_x = centroid_x if centroid_x is not None else 0 + request_msg.image_y = centroid_y if centroid_y is not None else 0 + self.annotated_img_pub.publish(request_msg) self.pick_confirmation_event.clear() @@ -126,10 +136,13 @@ def bounding_box_detection_feedback( if self.break_out_of_waiting_loop: self.logger.info("ROBOT WAS PREEMPTED") - self.pick_confirmation_response = False + self.pick_confirmation_approved = False + else: + self.logger.info(f"Pick Confirmation Response Received: approved ({self.pick_confirmation_approved}), xy ({self.pick_confirmation_xy}), image_index ({self.pick_confirmation_image_index})") - # This boolean determines whether the executor keeps going - return self.pick_confirmation_response + return (self.pick_confirmation_approved, + self.pick_confirmation_xy, + self.pick_confirmation_image_index) def pick_image_feedback(self, semantic_image, mask_image): bridge = CvBridge() @@ -218,13 +231,20 @@ def register_publishers(self, node): ) self.annotated_img_pub = node.create_publisher( - Image, "~/annotated_image", qos_profile=latching_qos + ManipulationApprovalRequest, "~/annotated_image", qos_profile=latching_qos ) self.lease_takeover_publisher = node.create_publisher(String, "~/takeover", 10) + #node.create_subscription( + # Bool, + # "~/pick_confirmation", + # self.pick_confirmation_callback, + # 10, + #) + node.create_subscription( - Bool, + ManipulationApprovalResponse, "~/pick_confirmation", self.pick_confirmation_callback, 10, @@ -259,13 +279,26 @@ def set_robot_holding_state(self, is_holding: bool, object_id: str, timeout=5): return future.result().success def pick_confirmation_callback(self, msg): - if msg.data: - self.logger.info("Detection is valid. Continuing pick action!") - self.pick_confirmation_response = True - else: + #if msg.data: + # self.logger.info("Detection is valid. Continuing pick action!") + # self.pick_confirmation_response = True + #else: + # self.logger.warn("Detection is invalid. Discontinuing pick action.") + # self.pick_confirmation_response = False + + #self.pick_confirmation_event.set() + + # If not approved, discontinue + # If approved, check whether the detection is overwritten + if not msg.approve: self.logger.warn("Detection is invalid. Discontinuing pick action.") - self.pick_confirmation_response = False - + self.pick_confirmation_approved = False + else: + self.pick_confirmation_approved = True + self.pick_confirmation_image_index = msg.image_index + self.pick_confirmation_xy[0] = msg.image_x + self.pick_confirmation_xy[1] = msg.image_y + self.logger.warn("Detection is valid. Continuing pick action!") self.pick_confirmation_event.set() def log_lease_takeover(self, event: str): From 4154c798d13324bdfe59409d6bf9f16943b1bc3c Mon Sep 17 00:00:00 2001 From: Jake Arkin Date: Mon, 30 Mar 2026 17:05:36 -0400 Subject: [PATCH 3/8] Remove annotation from image before publishing; Request approval even when no detections are found --- spot_tools/src/spot_skills/grasp_utils.py | 30 +++++++++++++++---- .../src/spot_tools_ros/spot_executor_ros.py | 23 +------------- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/spot_tools/src/spot_skills/grasp_utils.py b/spot_tools/src/spot_skills/grasp_utils.py index cc13f4f..569c679 100644 --- a/spot_tools/src/spot_skills/grasp_utils.py +++ b/spot_tools/src/spot_skills/grasp_utils.py @@ -211,11 +211,11 @@ def object_grasp( detection_index = 0 print("Found object centroid:", xy) - if detection_index is None or candidates is None: + if candidates is None: if feedback is not None: feedback.print( "INFO", - "Failed to find an object in any cameras after 2 attempts. Please check the detector or user input.", + "Failed to capture any camera images. Please check the detector or user input.", ) execute_recovery_action( spot, @@ -227,17 +227,23 @@ def object_grasp( time.sleep(1) return False - image, _, xy = candidates[detection_index] - # Display all candidate images in the approval panel if feedback is not None: annotated_imgs = [copy(img) for (_, img, _) in candidates] + if detection_index is not None: + _, _, xy = candidates[detection_index] + det_x = xy[0] if xy else None + det_y = xy[1] if xy else None + else: + det_x = None + det_y = None + approved, updated_xy, selected_index = feedback.bounding_box_detection_feedback( annotated_imgs, detection_index, - xy[0] if xy else None, - xy[1] if xy else None, + det_x, + det_y, semantic_class, ) @@ -248,6 +254,18 @@ def object_grasp( # Use selected camera image and pixel (panel always sends the correct selection) image, _, _ = candidates[selected_index] xy = updated_xy + else: + if detection_index is None: + execute_recovery_action( + spot, + recover_arm=False, + relative_pose=math_helpers.SE2Pose( + x=0.0, y=0.0, angle=np.random.choice([-0.5, 0.5]) + ), + ) + time.sleep(1) + return False + image, _, xy = candidates[detection_index] pick_vec = geometry_pb2.Vec2(x=xy[0], y=xy[1]) stow_arm(spot) diff --git a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py index 5981ea9..2784b49 100755 --- a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py +++ b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py @@ -95,30 +95,9 @@ def bounding_box_detection_feedback( ): bridge = CvBridge() - # Annotate only the detection image - if detection_index is not None and centroid_x is not None and centroid_y is not None: - annotated_img = annotated_imgs[detection_index] - label = f"{semantic_class}" - cv2.putText( - annotated_img, - label, - (centroid_x - 100, centroid_y - 200), - cv2.FONT_HERSHEY_SIMPLEX, - 5, - (0, 0, 225), - 20, - ) - cv2.drawMarker( - annotated_img, - (centroid_x, centroid_y), - (0, 0, 255), - markerType=cv2.MARKER_TILTED_CROSS, - markerSize=200, - thickness=30, - ) - request_msg = ManipulationApprovalRequest() request_msg.images = [bridge.cv2_to_imgmsg(img, encoding="passthrough") for img in annotated_imgs] + request_msg.has_detection = detection_index is not None request_msg.detection_image_index = detection_index if detection_index is not None else 0 request_msg.image_x = centroid_x if centroid_x is not None else 0 request_msg.image_y = centroid_y if centroid_y is not None else 0 From 7f7c940d464a86cf17e78d7bc7b128551f264b74 Mon Sep 17 00:00:00 2001 From: Jake Arkin Date: Tue, 31 Mar 2026 17:44:59 -0400 Subject: [PATCH 4/8] Only check secondary image sources if no primary detection found --- spot_tools/src/spot_skills/detection_utils.py | 54 +++++++++++++------ spot_tools/src/spot_skills/grasp_utils.py | 15 +++--- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/spot_tools/src/spot_skills/detection_utils.py b/spot_tools/src/spot_skills/detection_utils.py index be19eb3..29b52bb 100644 --- a/spot_tools/src/spot_skills/detection_utils.py +++ b/spot_tools/src/spot_skills/detection_utils.py @@ -1,10 +1,20 @@ from copy import copy +from dataclasses import dataclass +from typing import Optional import cv2 import numpy as np +from bosdyn.api import image_pb2 from ultralytics import YOLOE +@dataclass +class DetectionCandidate: + """A candidate image from a single camera source for object detection.""" + bosdyn_image: image_pb2.ImageResponse + cv2_image: np.ndarray + detection_xy: Optional[tuple[int, int]] + class Detector: def __init__(self, spot): self.spot = spot @@ -39,25 +49,26 @@ def set_up_detector(self, semantic_class): print(f"Updated recognized classes: {updated_classes}") def return_centroid(self, img_source, semantic_class, debug): - # Primary image - primary_image, primary_img = self.spot.get_image_RGB(view=img_source) - primary_xy = self._get_centroid(primary_img, semantic_class, rotate=0, debug=debug) - - # Always scan all other cameras - other_candidates, other_detection_index = self._look_for_object(semantic_class, debug=debug) + # List of DetectionCandidates + detection_candidates = [] # List of DetectionCandidates -- one per camera source on Spot + detection_index = None - # Combine: primary first - candidates = [(primary_image, primary_img, primary_xy)] + other_candidates + # Get the primary image source & run the detector; Add to detection_candidates + primary_bosdyn_image, primary_cv2_image = self.spot.get_image_RGB(view=img_source) + primary_xy = self._get_centroid(primary_cv2_image, semantic_class, rotate=0, debug=debug) + detection_candidates.append(DetectionCandidate(primary_bosdyn_image, primary_cv2_image, primary_xy)) + # If primary image has a detection, just get the images from the other image sources; Otherwise, look for detections if primary_xy is not None: - detection_index = 0 - elif other_detection_index is not None: - detection_index = other_detection_index + 1 # offset for prepended primary + detection_index = 0 + secondary_candidates = self._get_candidate_detection_images() else: - detection_index = None - print("Object not found in any camera") - - return detection_index, candidates + secondary_candidates, secondary_detection_index = self._look_for_object(semantic_class, debug=debug) + if secondary_detection_index is not None: + detection_index = secondary_detection_index + 1 # offset of the primary candidate + # Collect all of the detection candidates + detection_candidates.extend(secondary_candidates) + return detection_index, detection_candidates def _get_centroid(self, img, semantic_class, rotate, debug): if rotate == 0: @@ -121,10 +132,19 @@ def _get_centroid(self, img, semantic_class, rotate, debug): else: return None + def _get_candidate_detection_images(self): + detection_candidates = [] + sources = self.spot.image_client.list_image_sources() + sources = [s for s in sources if not "depth" in s.name and "hand_image" != s.name] + for source in sources: + bosdyn_image, cv2_image = self.spot.get_image_RGB(view=source.name) + detection_candidates.append(DetectionCandidate(bosdyn_image, cv2_image, None)) + return detection_candidates + def _look_for_object(self, semantic_class, debug): sources = self.spot.image_client.list_image_sources() - candidates = [] # list of (bosdyn_image, cv2_img, xy_or_None) + candidates = [] detection_index = None for source in sources: @@ -153,7 +173,7 @@ def _look_for_object(self, semantic_class, debug): if xy is None: print(f"Object not found in {image_source}.") - candidates.append((image, img, xy)) + candidates.append(DetectionCandidate(image, img, xy)) if xy is not None and detection_index is None: detection_index = len(candidates) - 1 diff --git a/spot_tools/src/spot_skills/grasp_utils.py b/spot_tools/src/spot_skills/grasp_utils.py index 569c679..ba26a60 100644 --- a/spot_tools/src/spot_skills/grasp_utils.py +++ b/spot_tools/src/spot_skills/grasp_utils.py @@ -30,7 +30,7 @@ open_gripper, stow_arm, ) -from spot_skills.detection_utils import Detector +from spot_skills.detection_utils import DetectionCandidate, Detector from spot_skills.primitives import execute_recovery_action g_image_click = None @@ -207,7 +207,7 @@ def object_grasp( else: image, img = spot.get_image_RGB(view=image_source) xy = get_user_grasp_input(spot, img) - candidates = [(image, img, xy)] + candidates = [DetectionCandidate(image, img, xy)] detection_index = 0 print("Found object centroid:", xy) @@ -229,10 +229,10 @@ def object_grasp( # Display all candidate images in the approval panel if feedback is not None: - annotated_imgs = [copy(img) for (_, img, _) in candidates] + cv2_images = [copy(c.cv2_image) for c in candidates] if detection_index is not None: - _, _, xy = candidates[detection_index] + xy = candidates[detection_index].detection_xy det_x = xy[0] if xy else None det_y = xy[1] if xy else None else: @@ -240,7 +240,7 @@ def object_grasp( det_y = None approved, updated_xy, selected_index = feedback.bounding_box_detection_feedback( - annotated_imgs, + cv2_images, detection_index, det_x, det_y, @@ -252,7 +252,7 @@ def object_grasp( return False # Use selected camera image and pixel (panel always sends the correct selection) - image, _, _ = candidates[selected_index] + image = candidates[selected_index].bosdyn_image xy = updated_xy else: if detection_index is None: @@ -265,7 +265,8 @@ def object_grasp( ) time.sleep(1) return False - image, _, xy = candidates[detection_index] + image = candidates[detection_index].bosdyn_image + xy = candidates[detection_index].detection_xy pick_vec = geometry_pb2.Vec2(x=xy[0], y=xy[1]) stow_arm(spot) From f7e639d9dc63b675b9a8fd8467c3305b33a1eb7c Mon Sep 17 00:00:00 2001 From: Jake Arkin Date: Tue, 31 Mar 2026 17:44:59 -0400 Subject: [PATCH 5/8] Only check secondary image sources if no primary detection found --- spot_tools/src/spot_skills/detection_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/spot_tools/src/spot_skills/detection_utils.py b/spot_tools/src/spot_skills/detection_utils.py index 29b52bb..2c44f58 100644 --- a/spot_tools/src/spot_skills/detection_utils.py +++ b/spot_tools/src/spot_skills/detection_utils.py @@ -49,7 +49,6 @@ def set_up_detector(self, semantic_class): print(f"Updated recognized classes: {updated_classes}") def return_centroid(self, img_source, semantic_class, debug): - # List of DetectionCandidates detection_candidates = [] # List of DetectionCandidates -- one per camera source on Spot detection_index = None From c6a0530f1431abe8929c035e71c5bb71b06e56c9 Mon Sep 17 00:00:00 2001 From: Jake Arkin Date: Tue, 31 Mar 2026 17:59:47 -0400 Subject: [PATCH 6/8] Update topic name for manipulation approval --- spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py index 32f8466..c2fe588 100755 --- a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py +++ b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py @@ -91,17 +91,17 @@ def __init__(self, odom_frame: str, output_dir: str): self.output_dir = output_dir def bounding_box_detection_feedback( - self, annotated_imgs, detection_index, centroid_x, centroid_y, semantic_class + self, detection_imgs, detection_index, centroid_x, centroid_y, semantic_class ): bridge = CvBridge() request_msg = ManipulationApprovalRequest() - request_msg.images = [bridge.cv2_to_imgmsg(img, encoding="passthrough") for img in annotated_imgs] + request_msg.images = [bridge.cv2_to_imgmsg(img, encoding="passthrough") for img in detection_imgs] request_msg.has_detection = detection_index is not None request_msg.detection_image_index = detection_index if detection_index is not None else 0 request_msg.image_x = centroid_x if centroid_x is not None else 0 request_msg.image_y = centroid_y if centroid_y is not None else 0 - self.annotated_img_pub.publish(request_msg) + self.detection_img_pub.publish(request_msg) self.pick_confirmation_event.clear() @@ -209,8 +209,8 @@ def register_publishers(self, node): MarkerArray, "~/mlp_target_publisher", qos_profile=latching_qos ) - self.annotated_img_pub = node.create_publisher( - ManipulationApprovalRequest, "~/annotated_image", qos_profile=latching_qos + self.detection_img_pub = node.create_publisher( + ManipulationApprovalRequest, "~/manipulation_request", qos_profile=latching_qos ) self.lease_takeover_publisher = node.create_publisher(String, "~/takeover", 10) From b55efd3b2f7e14d9ca7a0ced386aa42a6bdaa6e1 Mon Sep 17 00:00:00 2001 From: Jake Arkin Date: Wed, 1 Apr 2026 10:50:42 -0400 Subject: [PATCH 7/8] Apply formatting --- spot_tools/src/spot_skills/detection_utils.py | 48 ++++++++++------ .../src/spot_tools_ros/spot_executor_ros.py | 57 +++++++++++-------- 2 files changed, 66 insertions(+), 39 deletions(-) diff --git a/spot_tools/src/spot_skills/detection_utils.py b/spot_tools/src/spot_skills/detection_utils.py index 2c44f58..5e84744 100644 --- a/spot_tools/src/spot_skills/detection_utils.py +++ b/spot_tools/src/spot_skills/detection_utils.py @@ -11,10 +11,12 @@ @dataclass class DetectionCandidate: """A candidate image from a single camera source for object detection.""" + bosdyn_image: image_pb2.ImageResponse cv2_image: np.ndarray detection_xy: Optional[tuple[int, int]] + class Detector: def __init__(self, spot): self.spot = spot @@ -49,22 +51,32 @@ def set_up_detector(self, semantic_class): print(f"Updated recognized classes: {updated_classes}") def return_centroid(self, img_source, semantic_class, debug): - detection_candidates = [] # List of DetectionCandidates -- one per camera source on Spot + detection_candidates = [] # List of DetectionCandidates -- one per camera source on Spot detection_index = None # Get the primary image source & run the detector; Add to detection_candidates - primary_bosdyn_image, primary_cv2_image = self.spot.get_image_RGB(view=img_source) - primary_xy = self._get_centroid(primary_cv2_image, semantic_class, rotate=0, debug=debug) - detection_candidates.append(DetectionCandidate(primary_bosdyn_image, primary_cv2_image, primary_xy)) + primary_bosdyn_image, primary_cv2_image = self.spot.get_image_RGB( + view=img_source + ) + primary_xy = self._get_centroid( + primary_cv2_image, semantic_class, rotate=0, debug=debug + ) + detection_candidates.append( + DetectionCandidate(primary_bosdyn_image, primary_cv2_image, primary_xy) + ) # If primary image has a detection, just get the images from the other image sources; Otherwise, look for detections if primary_xy is not None: - detection_index = 0 - secondary_candidates = self._get_candidate_detection_images() + detection_index = 0 + secondary_candidates = self._get_candidate_detection_images() else: - secondary_candidates, secondary_detection_index = self._look_for_object(semantic_class, debug=debug) - if secondary_detection_index is not None: - detection_index = secondary_detection_index + 1 # offset of the primary candidate + secondary_candidates, secondary_detection_index = self._look_for_object( + semantic_class, debug=debug + ) + if secondary_detection_index is not None: + detection_index = ( + secondary_detection_index + 1 + ) # offset of the primary candidate # Collect all of the detection candidates detection_candidates.extend(secondary_candidates) return detection_index, detection_candidates @@ -132,13 +144,17 @@ def _get_centroid(self, img, semantic_class, rotate, debug): return None def _get_candidate_detection_images(self): - detection_candidates = [] - sources = self.spot.image_client.list_image_sources() - sources = [s for s in sources if not "depth" in s.name and "hand_image" != s.name] - for source in sources: - bosdyn_image, cv2_image = self.spot.get_image_RGB(view=source.name) - detection_candidates.append(DetectionCandidate(bosdyn_image, cv2_image, None)) - return detection_candidates + detection_candidates = [] + sources = self.spot.image_client.list_image_sources() + sources = [ + s for s in sources if "depth" not in s.name and "hand_image" != s.name + ] + for source in sources: + bosdyn_image, cv2_image = self.spot.get_image_RGB(view=source.name) + detection_candidates.append( + DetectionCandidate(bosdyn_image, cv2_image, None) + ) + return detection_candidates def _look_for_object(self, semantic_class, debug): sources = self.spot.image_client.list_image_sources() diff --git a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py index c2fe588..de4ee8c 100755 --- a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py +++ b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py @@ -2,7 +2,6 @@ import threading import time -import cv2 import numpy as np import rclpy import rclpy.time @@ -12,6 +11,10 @@ from cv_bridge import CvBridge from heracles_ros_interfaces.srv import UpdateHoldingState from nav_msgs.msg import Path +from nlu_interface_rviz.msg import ( + ManipulationApprovalRequest, + ManipulationApprovalResponse, +) from rclpy.callback_groups import MutuallyExclusiveCallbackGroup from rclpy.executors import MultiThreadedExecutor from rclpy.node import Node @@ -24,7 +27,7 @@ from spot_executor.fake_spot import FakeSpot from spot_executor.spot import Spot from spot_skills.detection_utils import YOLODetector -from std_msgs.msg import Bool, String +from std_msgs.msg import String from visualization_msgs.msg import Marker, MarkerArray from robot_executor_interface.mid_level_planner import ( @@ -36,8 +39,6 @@ from spot_tools_ros.occupancy_grid_ros_updater import OccupancyGridROSUpdater from spot_tools_ros.utils import get_tf_pose, waypoints_to_path -from nlu_interface_rviz.msg import ManipulationApprovalRequest, ManipulationApprovalResponse - def load_inverse_semantic_id_map_from_label_space(fn): with open(fn, "r") as fo: @@ -79,10 +80,10 @@ def build_markers(pts, namespaces, frames, colors): class RosFeedbackCollector: def __init__(self, odom_frame: str, output_dir: str): self.pick_confirmation_event = threading.Event() - #self.pick_confirmation_response = False + # self.pick_confirmation_response = False self.pick_confirmation_approved = False - self.pick_confirmation_xy = [0,0] + self.pick_confirmation_xy = [0, 0] self.pick_confirmation_image_index = 0 self.break_out_of_waiting_loop = False @@ -96,9 +97,13 @@ def bounding_box_detection_feedback( bridge = CvBridge() request_msg = ManipulationApprovalRequest() - request_msg.images = [bridge.cv2_to_imgmsg(img, encoding="passthrough") for img in detection_imgs] + request_msg.images = [ + bridge.cv2_to_imgmsg(img, encoding="passthrough") for img in detection_imgs + ] request_msg.has_detection = detection_index is not None - request_msg.detection_image_index = detection_index if detection_index is not None else 0 + request_msg.detection_image_index = ( + detection_index if detection_index is not None else 0 + ) request_msg.image_x = centroid_x if centroid_x is not None else 0 request_msg.image_y = centroid_y if centroid_y is not None else 0 self.detection_img_pub.publish(request_msg) @@ -117,11 +122,15 @@ def bounding_box_detection_feedback( self.logger.info("ROBOT WAS PREEMPTED") self.pick_confirmation_approved = False else: - self.logger.info(f"Pick Confirmation Response Received: approved ({self.pick_confirmation_approved}), xy ({self.pick_confirmation_xy}), image_index ({self.pick_confirmation_image_index})") + self.logger.info( + f"Pick Confirmation Response Received: approved ({self.pick_confirmation_approved}), xy ({self.pick_confirmation_xy}), image_index ({self.pick_confirmation_image_index})" + ) - return (self.pick_confirmation_approved, - self.pick_confirmation_xy, - self.pick_confirmation_image_index) + return ( + self.pick_confirmation_approved, + self.pick_confirmation_xy, + self.pick_confirmation_image_index, + ) def pick_image_feedback(self, semantic_image, mask_image): bridge = CvBridge() @@ -210,17 +219,19 @@ def register_publishers(self, node): ) self.detection_img_pub = node.create_publisher( - ManipulationApprovalRequest, "~/manipulation_request", qos_profile=latching_qos + ManipulationApprovalRequest, + "~/manipulation_request", + qos_profile=latching_qos, ) self.lease_takeover_publisher = node.create_publisher(String, "~/takeover", 10) - #node.create_subscription( + # node.create_subscription( # Bool, # "~/pick_confirmation", # self.pick_confirmation_callback, # 10, - #) + # ) node.create_subscription( ManipulationApprovalResponse, @@ -258,14 +269,14 @@ def set_robot_holding_state(self, is_holding: bool, object_id: str, timeout=5): return future.result().success def pick_confirmation_callback(self, msg): - #if msg.data: + # if msg.data: # self.logger.info("Detection is valid. Continuing pick action!") # self.pick_confirmation_response = True - #else: + # else: # self.logger.warn("Detection is invalid. Discontinuing pick action.") # self.pick_confirmation_response = False - #self.pick_confirmation_event.set() + # self.pick_confirmation_event.set() # If not approved, discontinue # If approved, check whether the detection is overwritten @@ -273,11 +284,11 @@ def pick_confirmation_callback(self, msg): self.logger.warn("Detection is invalid. Discontinuing pick action.") self.pick_confirmation_approved = False else: - self.pick_confirmation_approved = True - self.pick_confirmation_image_index = msg.image_index - self.pick_confirmation_xy[0] = msg.image_x - self.pick_confirmation_xy[1] = msg.image_y - self.logger.warn("Detection is valid. Continuing pick action!") + self.pick_confirmation_approved = True + self.pick_confirmation_image_index = msg.image_index + self.pick_confirmation_xy[0] = msg.image_x + self.pick_confirmation_xy[1] = msg.image_y + self.logger.warn("Detection is valid. Continuing pick action!") self.pick_confirmation_event.set() def log_lease_takeover(self, event: str): From e480aaff5277122df091beef90770ab170b2183c Mon Sep 17 00:00:00 2001 From: Jake Arkin Date: Thu, 2 Apr 2026 11:53:25 -0400 Subject: [PATCH 8/8] Remove code comment block --- spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py index de4ee8c..36df3d8 100755 --- a/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py +++ b/spot_tools_ros/src/spot_tools_ros/spot_executor_ros.py @@ -226,13 +226,6 @@ def register_publishers(self, node): self.lease_takeover_publisher = node.create_publisher(String, "~/takeover", 10) - # node.create_subscription( - # Bool, - # "~/pick_confirmation", - # self.pick_confirmation_callback, - # 10, - # ) - node.create_subscription( ManipulationApprovalResponse, "~/pick_confirmation",