import modal import os app = modal.App("surgisight") image = ( modal.Image.debian_slim(python_version="3.11") .apt_install("libgl1", "libglib2.0-0") .pip_install( "ultralytics", "pillow", "numpy", "opencv-python-headless", "huggingface_hub", ) ) # Cache the model weights inside the Modal image so it doesn't re-download every call with image.imports(): from ultralytics import YOLO from PIL import Image as PILImage import numpy as np import cv2 import io @app.cls(gpu="T4", image=image, secrets=[modal.Secret.from_name("hf-secret")]) class SurgiSightDetector: @modal.enter() def load_model(self): from huggingface_hub import hf_hub_download model_path = hf_hub_download( repo_id="sugan04/cholec-yolo26n-seg", filename="best.pt", token=os.environ.get("HF_TOKEN") ) self.model = YOLO(model_path) @modal.method() def run(self, image_bytes: bytes, conf_threshold: float = 0.25): nparr = np.frombuffer(image_bytes, np.uint8) frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR) results = self.model(frame, task="segment", conf=conf_threshold) annotated = results[0].plot() # Encode annotated image back to bytes _, buffer = cv2.imencode(".png", annotated) annotated_bytes = buffer.tobytes() # Extract detections boxes = results[0].boxes detections = [] if boxes is not None and len(boxes) > 0: for box in boxes: detections.append({ "cls_id": int(box.cls[0]), "conf": float(box.conf[0]) }) return {"annotated_bytes": annotated_bytes, "detections": detections} # For local testing @app.local_entrypoint() def main(): from PIL import Image as PILImage import io detector = SurgiSightDetector() print("Modal SurgiSight detector ready.")