Use this file to discover all available pages before exploring further.
RF-DETR supports instance segmentation with the same consistent API as its detection models. The segmentation models are trained on Microsoft COCO and produce pixel-level instance masks alongside bounding boxes and class labels.
import supervision as svfrom rfdetr import RFDETRSegMediumfrom rfdetr.assets.coco_classes import COCO_CLASSESmodel = RFDETRSegMedium()detections = model.predict("https://media.roboflow.com/dog.jpg", threshold=0.5)labels = [f"{COCO_CLASSES[class_id]}" for class_id in detections.class_id]annotated_image = sv.MaskAnnotator().annotate(detections.data["source_image"], detections)annotated_image = sv.LabelAnnotator().annotate(annotated_image, detections, labels)
Use sv.MaskAnnotator() to render instance masks. For detections without masks (e.g., when comparing with detection models), use sv.BoxAnnotator() instead.
import cv2import supervision as svfrom rfdetr import RFDETRSegMediumfrom rfdetr.assets.coco_classes import COCO_CLASSESmodel = RFDETRSegMedium()video_capture = cv2.VideoCapture("<SOURCE_VIDEO_PATH>")if not video_capture.isOpened(): raise RuntimeError("Failed to open video source: <SOURCE_VIDEO_PATH>")while True: success, frame_bgr = video_capture.read() if not success: break frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) detections = model.predict(frame_rgb, threshold=0.5) labels = [COCO_CLASSES[class_id] for class_id in detections.class_id] annotated_frame = sv.MaskAnnotator().annotate(frame_bgr, detections) annotated_frame = sv.LabelAnnotator().annotate(annotated_frame, detections, labels) cv2.imshow("RF-DETR-Seg Video", annotated_frame) if cv2.waitKey(1) & 0xFF == ord("q"): breakvideo_capture.release()cv2.destroyAllWindows()
Replace <SOURCE_VIDEO_PATH> with your video file path.
import cv2import supervision as svfrom rfdetr import RFDETRSegMediumfrom rfdetr.assets.coco_classes import COCO_CLASSESmodel = RFDETRSegMedium()WEBCAM_INDEX = 0 # Change this to the desired webcam index (e.g., 1, 2, ...)video_capture = cv2.VideoCapture(WEBCAM_INDEX)if not video_capture.isOpened(): raise RuntimeError(f"Failed to open webcam: {WEBCAM_INDEX}")while True: success, frame_bgr = video_capture.read() if not success: break frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) detections = model.predict(frame_rgb, threshold=0.5) labels = [COCO_CLASSES[class_id] for class_id in detections.class_id] annotated_frame = sv.MaskAnnotator().annotate(frame_bgr, detections) annotated_frame = sv.LabelAnnotator().annotate(annotated_frame, detections, labels) cv2.imshow("RF-DETR-Seg Webcam", annotated_frame) if cv2.waitKey(1) & 0xFF == ord("q"): breakvideo_capture.release()cv2.destroyAllWindows()
WEBCAM_INDEX is usually 0 for the default camera. Press q to quit.
import cv2import supervision as svfrom rfdetr import RFDETRSegMediumfrom rfdetr.assets.coco_classes import COCO_CLASSESmodel = RFDETRSegMedium()video_capture = cv2.VideoCapture("<RTSP_STREAM_URL>")if not video_capture.isOpened(): raise RuntimeError("Failed to open RTSP stream: <RTSP_STREAM_URL>")while True: success, frame_bgr = video_capture.read() if not success: break frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) detections = model.predict(frame_rgb, threshold=0.5) labels = [COCO_CLASSES[class_id] for class_id in detections.class_id] annotated_frame = sv.MaskAnnotator().annotate(frame_bgr, detections) annotated_frame = sv.LabelAnnotator().annotate(annotated_frame, detections, labels) cv2.imshow("RF-DETR-Seg RTSP", annotated_frame) if cv2.waitKey(1) & 0xFF == ord("q"): breakvideo_capture.release()cv2.destroyAllWindows()
Replace <RTSP_STREAM_URL> with your stream URL (e.g., rtsp://user:pass@192.168.1.1/stream).
Pass a list of images to predict() to process multiple images in a single forward pass. The method returns a list of supervision.Detections objects, each containing bounding boxes, class IDs, confidence scores, and instance masks.
import ioimport requestsimport supervision as svfrom PIL import Imagefrom rfdetr import RFDETRSegMediumfrom rfdetr.assets.coco_classes import COCO_CLASSESmodel = RFDETRSegMedium()urls = [ "https://media.roboflow.com/notebooks/examples/dog-2.jpeg", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg",]images = [Image.open(io.BytesIO(requests.get(url).content)) for url in urls]detections_list = model.predict(images, threshold=0.5)for image, detections in zip(images, detections_list): labels = [ f"{COCO_CLASSES[class_id]} {confidence:.2f}" for class_id, confidence in zip(detections.class_id, detections.confidence) ] annotated_image = image.copy() annotated_image = sv.MaskAnnotator().annotate(annotated_image, detections) annotated_image = sv.LabelAnnotator().annotate(annotated_image, detections, labels) sv.plot_image(annotated_image)