openai-structured-output

code_execution · unverified · null · json · download .py

Model to use (gpt-4o or gpt-4o-mini recommended for structured output)

import sys
import os
import subprocess
import time
import urllib.request
import json

# ─────────────────────────────────────────
# PRE_EXECUTION
# ─────────────────────────────────────────

for attempt in range(2):
    try:
        req = urllib.request.Request(
            "https://checklist.day/api/registry/openai",
            headers={"User-Agent": "checklist-agent/1.0"}
        )
        with urllib.request.urlopen(req, timeout=10) as resp:
            registry = json.loads(resp.read())
            break
    except Exception as e:
        if attempt == 1:
            print(f"ABORT: registry unreachable — {e}")
            sys.exit(1)
        time.sleep(2)

warnings = registry.get("warnings", [])
if warnings:
    print("[openai] WARNINGS:")
    for w in warnings if isinstance(warnings, list) else [warnings]:
        print(f"  ⚠ {w}")

# ─────────────────────────────────────────
# EXECUTION
# ─────────────────────────────────────────

subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "openai>=1.0.0"])

from openai import OpenAI

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_MODEL   = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")

if not OPENAI_API_KEY:
    print("ABORT: OPENAI_API_KEY env var not set")
    sys.exit(1)

client = OpenAI(api_key=OPENAI_API_KEY)

PROMPT = "Extract: name='Alice', age=30, is_active=true. Return as JSON."

# Method 1: json_schema mode (strict, recommended for gpt-4o+)
# FOOTGUN: json_schema requires "strict": true and all properties in "required"
# FOOTGUN: does NOT work with older models like gpt-3.5-turbo
json_schema_ok   = False
schema_validated = False

try:
    response = client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=[
            {"role": "system", "content": "Extract structured data from the user message."},
            {"role": "user",   "content": PROMPT},
        ],
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name":   "person",
                "strict": True,
                "schema": {
                    "type": "object",
                    "properties": {
                        "name":      {"type": "string"},
                        "age":       {"type": "integer"},
                        "is_active": {"type": "boolean"},
                    },
                    "required":             ["name", "age", "is_active"],
                    "additionalProperties": False,
                },
            },
        },
        max_tokens=64,
    )
    content = response.choices[0].message.content
    parsed  = json.loads(content)
    json_schema_ok   = True
    schema_validated = (
        parsed.get("name") == "Alice" and
        parsed.get("age") == 30 and
        parsed.get("is_active") is True
    )
    print(f"  json_schema mode: {parsed}")
except Exception as e:
    print(f"  json_schema mode failed: {e}")

# Method 2: json_object mode (looser, works with more models)
# FOOTGUN: must include the word "JSON" in the prompt or system message — otherwise API error
# FOOTGUN: json_object does NOT validate against a schema — model may return any JSON shape
json_object_ok = False

try:
    response2 = client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=[
            {"role": "system", "content": "Return valid JSON only."},
            {"role": "user",   "content": PROMPT},
        ],
        response_format={"type": "json_object"},
        max_tokens=64,
    )
    content2       = response2.choices[0].message.content
    parsed2        = json.loads(content2)
    json_object_ok = isinstance(parsed2, dict) and len(parsed2) > 0
    print(f"  json_object mode: {parsed2}")
except Exception as e:
    print(f"  json_object mode failed: {e}")

# ─────────────────────────────────────────
# POST_EXECUTION
# ─────────────────────────────────────────

assert json_object_ok, "FAIL: json_object mode did not return parseable JSON dict"

result = {
    "json_schema_ok":   json_schema_ok,
    "json_object_ok":   json_object_ok,
    "schema_validated": schema_validated,
}
print(json.dumps(result, indent=2))
print("PASS")