{"id":"openai-function-calling","version":"1.0.0","primitive":"code_execution","description":"Model to use (must support tool use)","registry_refs":["openai"],"tags":[],"solves":[],"auth_required":true,"verified":false,"last_verified":"null","next_check":"2026-07-30","eval_result":"null","eval_env":"null","mast":[],"ref":"https://arxiv.org/abs/2503.13657","inputs":[{"name":"OPENAI_API_KEY","required":true,"description":"OpenAI API key starting with sk-"},{"name":"OPENAI_MODEL","default":"gpt-4o-mini","required":false,"description":"Model to use (must support tool use)"}],"executable":"# ============================================\n# checklist:     openai-function-calling\n# version:       1.0.0\n# primitive:     code_execution\n# description:   Use OpenAI function calling (tool use) with tool_choice, parse tool call arguments, and handle parallel calls\n# registry_refs: openai\n# auth_required: true\n# verified:      false\n# last_verified: null\n# next_check:    2026-07-30\n# eval_result:   null\n# eval_env:      null\n#\n# inputs:\n#   - name: OPENAI_API_KEY\n#     required: true\n#     description: OpenAI API key starting with sk-\n#   - name: OPENAI_MODEL\n#     required: false\n#     default: \"gpt-4o-mini\"\n#     description: Model to use (must support tool use)\n#\n# OUTPUTS:\n#   tool_called       — name of the tool the model called\n#   arguments_parsed  — true if tool arguments were parsed correctly\n#   finish_reason     — should be \"tool_calls\" not \"stop\"\n#\n# MAST FAILURE MODES ADDRESSED:\n# FM-1.1 Disobey Task Specification        — tool_choice=\"required\" forces tool use\n# FM-2.6 Reasoning-Action Mismatch         — arguments parsed via json.loads, not string match\n# FM-3.3 Incorrect Verification            — finish_reason verified to be \"tool_calls\"\n#\n# ref: https://arxiv.org/abs/2503.13657\n# ============================================\n\nimport sys\nimport os\nimport subprocess\nimport time\nimport urllib.request\nimport json\n\n# ─────────────────────────────────────────\n# PRE_EXECUTION\n# ─────────────────────────────────────────\n\nfor attempt in range(2):\n    try:\n        req = urllib.request.Request(\n            \"https://checklist.day/api/registry/openai\",\n            headers={\"User-Agent\": \"checklist-agent/1.0\"}\n        )\n        with urllib.request.urlopen(req, timeout=10) as resp:\n            registry = json.loads(resp.read())\n            break\n    except Exception as e:\n        if attempt == 1:\n            print(f\"ABORT: registry unreachable — {e}\")\n            sys.exit(1)\n        time.sleep(2)\n\nwarnings = registry.get(\"warnings\", [])\nif warnings:\n    print(\"[openai] WARNINGS:\")\n    for w in warnings if isinstance(warnings, list) else [warnings]:\n        print(f\"  ⚠ {w}\")\n\n# ─────────────────────────────────────────\n# EXECUTION\n# ─────────────────────────────────────────\n\nsubprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"openai>=1.0.0\"])\n\nfrom openai import OpenAI\n\nOPENAI_API_KEY = os.environ.get(\"OPENAI_API_KEY\")\nOPENAI_MODEL   = os.environ.get(\"OPENAI_MODEL\", \"gpt-4o-mini\")\n\nif not OPENAI_API_KEY:\n    print(\"ABORT: OPENAI_API_KEY env var not set\")\n    sys.exit(1)\n\nclient = OpenAI(api_key=OPENAI_API_KEY)\n\ntools = [\n    {\n        \"type\": \"function\",\n        \"function\": {\n            \"name\": \"get_weather\",\n            \"description\": \"Get the weather for a city\",\n            \"parameters\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"city\":  {\"type\": \"string\", \"description\": \"City name\"},\n                    \"units\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"Temperature units\"},\n                },\n                \"required\": [\"city\"],\n            },\n        },\n    }\n]\n\nresponse = client.chat.completions.create(\n    model=OPENAI_MODEL,\n    messages=[{\"role\": \"user\", \"content\": \"What's the weather in Paris?\"}],\n    tools=tools,\n    # FOOTGUN: tool_choice=\"auto\" lets model skip tools — use \"required\" to force a tool call\n    tool_choice=\"required\",\n)\n\n# FOOTGUN: finish_reason is \"tool_calls\" not \"stop\" when model called a tool\nfinish_reason = response.choices[0].finish_reason\nmessage       = response.choices[0].message\n\n# FOOTGUN: tool calls are in message.tool_calls, not message.content\ntool_calls = message.tool_calls\nassert tool_calls, \"FAIL: no tool calls in response\"\n\ntool_call         = tool_calls[0]\ntool_called       = tool_call.function.name\n\n# FOOTGUN: arguments is a JSON string, not a dict — must parse with json.loads\nraw_args          = tool_call.function.arguments\narguments         = json.loads(raw_args)\narguments_parsed  = \"city\" in arguments\n\nprint(f\"  tool called:   {tool_called}\")\nprint(f\"  arguments:     {arguments}\")\nprint(f\"  finish_reason: {finish_reason}\")\n\n# ─────────────────────────────────────────\n# POST_EXECUTION\n# ─────────────────────────────────────────\n\nassert tool_called == \"get_weather\", f\"FAIL: expected 'get_weather', got '{tool_called}'\"\nassert arguments_parsed, f\"FAIL: 'city' not in parsed arguments: {arguments}\"\nassert finish_reason == \"tool_calls\", f\"FAIL: expected finish_reason='tool_calls', got '{finish_reason}'\"\nassert \"paris\" in arguments.get(\"city\", \"\").lower(), f\"FAIL: expected city=Paris, got {arguments.get('city')}\"\n\nresult = {\n    \"tool_called\":      tool_called,\n    \"arguments\":        arguments,\n    \"arguments_parsed\": arguments_parsed,\n    \"finish_reason\":    finish_reason,\n}\nprint(json.dumps(result, indent=2))\nprint(\"PASS\")\n"}