{"library":"rf-groundingdino","title":"Grounding DINO (Roboflow Fork)","description":"A Roboflow-maintained fork of Grounding DINO, a state-of-the-art open-set object detector that can detect arbitrary objects based on text prompts. Version 0.3.0 combines image and text encoders with a transformer-based fusion approach. Release cadence is irregular; actively maintained.","language":"python","status":"active","last_verified":"Sat May 09","install":{"commands":["pip install rf-groundingdino"],"cli":null},"imports":["from rf_groundingdino import groundingdino","from rf_groundingdino.util.inference import load_model","from rf_groundingdino.util.inference import predict"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"from rf_groundingdino import groundingdino\nfrom rf_groundingdino.util.inference import load_model, predict\nimport torch\n\ndevice = 'cuda' if torch.cuda.is_available() else 'cpu'\nmodel = load_model('groundingdino_swint_ogc', 'path/to/config.py', 'path/to/weights.pth')\n\n# For inference, use predict() or model()\nimport cv2\nimage = cv2.imread('image.jpg')\nprompts = 'cat . dog .'  # separate classes with ' . '\nboxes, logits, phrases = predict(model, image, prompts, box_threshold=0.3, text_threshold=0.25)\nprint('Detections:', len(phrases))","lang":"python","description":"Load the model and run inference on an image with text prompts.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":null}