{"id":"openai-streaming-response","version":"1.0.0","primitive":"code_execution","description":"Model to use for streaming completion","registry_refs":["openai"],"tags":[],"solves":[],"auth_required":true,"verified":false,"last_verified":"null","next_check":"2026-07-30","eval_result":"null","eval_env":"null","mast":[],"ref":"https://arxiv.org/abs/2503.13657","inputs":[{"name":"OPENAI_API_KEY","required":true,"description":"OpenAI API key starting with sk-"},{"name":"OPENAI_MODEL","default":"gpt-4o-mini","required":false,"description":"Model to use for streaming completion"}],"executable":"# ============================================\n# checklist:     openai-streaming-response\n# version:       1.0.0\n# primitive:     code_execution\n# description:   Stream a chat completion response from OpenAI and correctly accumulate chunks avoiding common chunk handling footguns\n# registry_refs: openai\n# auth_required: true\n# verified:      false\n# last_verified: null\n# next_check:    2026-07-30\n# eval_result:   null\n# eval_env:      null\n#\n# inputs:\n#   - name: OPENAI_API_KEY\n#     required: true\n#     description: OpenAI API key starting with sk-\n#   - name: OPENAI_MODEL\n#     required: false\n#     default: \"gpt-4o-mini\"\n#     description: Model to use for streaming completion\n#\n# OUTPUTS:\n#   chunks_received   — number of chunks received from stream\n#   content           — fully accumulated response text\n#   stream_ok         — true if streaming worked correctly\n#\n# MAST FAILURE MODES ADDRESSED:\n# FM-1.1 Disobey Task Specification        — chunk.choices[0].delta.content can be None, must guard\n# FM-3.3 Incorrect Verification            — accumulated content verified against expected output\n#\n# ref: https://arxiv.org/abs/2503.13657\n# ============================================\n\nimport sys\nimport os\nimport subprocess\nimport time\nimport urllib.request\nimport json\n\n# ─────────────────────────────────────────\n# PRE_EXECUTION\n# ─────────────────────────────────────────\n\nfor attempt in range(2):\n    try:\n        req = urllib.request.Request(\n            \"https://checklist.day/api/registry/openai\",\n            headers={\"User-Agent\": \"checklist-agent/1.0\"}\n        )\n        with urllib.request.urlopen(req, timeout=10) as resp:\n            registry = json.loads(resp.read())\n            break\n    except Exception as e:\n        if attempt == 1:\n            print(f\"ABORT: registry unreachable — {e}\")\n            sys.exit(1)\n        time.sleep(2)\n\nwarnings = registry.get(\"warnings\", [])\nif warnings:\n    print(\"[openai] WARNINGS:\")\n    for w in warnings if isinstance(warnings, list) else [warnings]:\n        print(f\"  ⚠ {w}\")\n\n# ─────────────────────────────────────────\n# EXECUTION\n# ─────────────────────────────────────────\n\nsubprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"openai>=1.0.0\"])\n\nfrom openai import OpenAI\n\nOPENAI_API_KEY = os.environ.get(\"OPENAI_API_KEY\")\nOPENAI_MODEL   = os.environ.get(\"OPENAI_MODEL\", \"gpt-4o-mini\")\n\nif not OPENAI_API_KEY:\n    print(\"ABORT: OPENAI_API_KEY env var not set\")\n    sys.exit(1)\n\nclient = OpenAI(api_key=OPENAI_API_KEY)\n\nchunks_received = 0\naccumulated = []\n\n# FOOTGUN: stream=True returns a generator, not a response object\n# FOOTGUN: chunk.choices[0].delta.content is None on first and last chunks — must guard with `or \"\"`\n# FOOTGUN: usage is None on all chunks except the last when stream_options={\"include_usage\": True}\nwith client.chat.completions.create(\n    model=OPENAI_MODEL,\n    messages=[\n        {\"role\": \"user\", \"content\": \"Count from 1 to 5, one number per line, nothing else.\"},\n    ],\n    max_tokens=32,\n    temperature=0,\n    stream=True,\n) as stream:\n    for chunk in stream:\n        chunks_received += 1\n        # FOOTGUN: always guard — delta.content is None on role/finish chunks\n        delta = chunk.choices[0].delta.content if chunk.choices else None\n        if delta:\n            accumulated.append(delta)\n            print(delta, end=\"\", flush=True)\n\nprint()  # newline after stream\n\ncontent  = \"\".join(accumulated)\nstream_ok = chunks_received > 1 and len(content) > 0\n\nprint(f\"  chunks received: {chunks_received}\")\nprint(f\"  content length:  {len(content)} chars\")\n\n# ─────────────────────────────────────────\n# POST_EXECUTION\n# ─────────────────────────────────────────\n\nassert stream_ok, f\"FAIL: streaming failed — chunks={chunks_received}, content={content!r}\"\nassert chunks_received > 1, \"FAIL: expected multiple chunks, got 1 (not streaming?)\"\n\n# Verify numbers 1-5 are present in output\nfor n in [\"1\", \"2\", \"3\", \"4\", \"5\"]:\n    assert n in content, f\"FAIL: expected '{n}' in streamed content, got: {content!r}\"\n\nresult = {\n    \"chunks_received\": chunks_received,\n    \"content\":         content.strip(),\n    \"stream_ok\":       stream_ok,\n}\nprint(json.dumps(result, indent=2))\nprint(\"PASS\")\n"}