{"id":"postgres-bulk-insert","version":"1.0.0","primitive":"code_execution","description":"Number of rows to insert in the benchmark","registry_refs":["psycopg2-binary"],"tags":[],"solves":[],"auth_required":true,"verified":false,"last_verified":"null","next_check":"2026-07-30","eval_result":"null","eval_env":"null","mast":[],"ref":"https://arxiv.org/abs/2503.13657","inputs":[{"name":"DATABASE_URL","required":true,"description":"PostgreSQL connection string e.g. \"postgresql://user:pass@host:5432/dbname\""},{"name":"ROW_COUNT","default":"1000","required":false,"description":"Number of rows to insert in the benchmark"}],"executable":"# ============================================\n# checklist:     postgres-bulk-insert\n# version:       1.0.0\n# primitive:     code_execution\n# description:   Bulk insert rows into PostgreSQL using copy_expert (fast) vs executemany (safe) with tradeoff guidance\n# registry_refs: psycopg2-binary\n# auth_required: true\n# verified:      false\n# last_verified: null\n# next_check:    2026-07-30\n# eval_result:   null\n# eval_env:      null\n#\n# inputs:\n#   - name: DATABASE_URL\n#     required: true\n#     description: PostgreSQL connection string e.g. \"postgresql://user:pass@host:5432/dbname\"\n#   - name: ROW_COUNT\n#     required: false\n#     default: \"100\"\n#     description: Number of rows to insert in the benchmark\n#\n# OUTPUTS:\n#   row_count          — number of rows inserted\n#   executemany_ms     — time for executemany in milliseconds\n#   copy_expert_ms     — time for copy_expert in milliseconds\n#   copy_speedup       — how many times faster copy_expert was\n#   recommendation     — which method to use based on row count\n#\n# MAST FAILURE MODES ADDRESSED:\n# FM-1.1 Disobey Task Specification        — both methods verified to insert correct row count\n# FM-3.2 No or Incomplete Verification     — row counts verified after each method\n# FM-3.3 Incorrect Verification            — timing compared, not assumed\n#\n# ref: https://arxiv.org/abs/2503.13657\n# ============================================\n\nimport sys\nimport os\nimport subprocess\nimport time\nimport urllib.request\nimport json\nimport io\n\n# ─────────────────────────────────────────\n# PRE_EXECUTION\n# ─────────────────────────────────────────\n\nfor attempt in range(2):\n    try:\n        req = urllib.request.Request(\n            \"https://checklist.day/api/registry/psycopg2-binary\",\n            headers={\"User-Agent\": \"checklist-agent/1.0\"}\n        )\n        with urllib.request.urlopen(req, timeout=10) as resp:\n            registry = json.loads(resp.read())\n            break\n    except Exception as e:\n        if attempt == 1:\n            print(f\"ABORT: registry unreachable — {e}\")\n            sys.exit(1)\n        time.sleep(2)\n\nwarnings = registry.get(\"warnings\", [])\nif warnings:\n    print(\"[psycopg2-binary] WARNINGS:\")\n    for w in warnings if isinstance(warnings, list) else [warnings]:\n        print(f\"  ⚠ {w}\")\n\n# ─────────────────────────────────────────\n# EXECUTION\n# ─────────────────────────────────────────\n\nsubprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"psycopg2-binary>=2.9.9\"])\n\nimport psycopg2\n\nDATABASE_URL = os.environ.get(\"DATABASE_URL\")\nROW_COUNT    = int(os.environ.get(\"ROW_COUNT\", \"100\"))\n\nif not DATABASE_URL:\n    print(\"ABORT: DATABASE_URL env var not set\")\n    sys.exit(1)\n\nif DATABASE_URL.startswith(\"postgres://\"):\n    DATABASE_URL = DATABASE_URL.replace(\"postgres://\", \"postgresql://\", 1)\n\nTABLE = \"checklist_bulk_test\"\nrows = [(f\"item_{i}\", i) for i in range(ROW_COUNT)]\n\nconn = psycopg2.connect(DATABASE_URL, connect_timeout=10)\n\ntry:\n    with conn.cursor() as cur:\n        cur.execute(f\"\"\"\n            CREATE TABLE IF NOT EXISTS {TABLE} (\n                id    SERIAL PRIMARY KEY,\n                name  TEXT NOT NULL,\n                value INTEGER NOT NULL\n            )\n        \"\"\")\n        conn.commit()\n\n        # Method 1: executemany\n        # FOOTGUN: executemany is safe but slow — one round-trip per row above psycopg2 3.x\n        # Use for < 1000 rows or when values need per-row validation\n        cur.execute(f\"DELETE FROM {TABLE}\")\n        conn.commit()\n\n        t0 = time.perf_counter()\n        cur.executemany(f\"INSERT INTO {TABLE} (name, value) VALUES (%s, %s)\", rows)\n        conn.commit()\n        executemany_ms = round((time.perf_counter() - t0) * 1000, 1)\n\n        cur.execute(f\"SELECT COUNT(*) FROM {TABLE}\")\n        executemany_count = cur.fetchone()[0]\n        print(f\"  executemany: {executemany_count} rows in {executemany_ms}ms\")\n\n        # Method 2: copy_expert\n        # FOOTGUN: copy_expert is 10-100x faster but requires CSV-formatted StringIO\n        # Use for > 1000 rows when speed matters\n        cur.execute(f\"DELETE FROM {TABLE}\")\n        conn.commit()\n\n        csv_data = io.StringIO()\n        for name, value in rows:\n            csv_data.write(f\"{name}\\t{value}\\n\")\n        csv_data.seek(0)\n\n        t0 = time.perf_counter()\n        cur.copy_expert(f\"COPY {TABLE} (name, value) FROM STDIN\", csv_data)\n        conn.commit()\n        copy_expert_ms = round((time.perf_counter() - t0) * 1000, 1)\n\n        cur.execute(f\"SELECT COUNT(*) FROM {TABLE}\")\n        copy_count = cur.fetchone()[0]\n        print(f\"  copy_expert: {copy_count} rows in {copy_expert_ms}ms\")\n\n        # Cleanup\n        cur.execute(f\"DROP TABLE IF EXISTS {TABLE}\")\n        conn.commit()\n\nfinally:\n    conn.close()\n\n# ─────────────────────────────────────────\n# POST_EXECUTION\n# ─────────────────────────────────────────\n\nassert executemany_count == ROW_COUNT, f\"FAIL: executemany inserted {executemany_count}, expected {ROW_COUNT}\"\nassert copy_count == ROW_COUNT, f\"FAIL: copy_expert inserted {copy_count}, expected {ROW_COUNT}\"\n\ncopy_speedup = round(executemany_ms / copy_expert_ms, 1) if copy_expert_ms > 0 else 0\nrecommendation = \"copy_expert\" if ROW_COUNT >= 1000 else \"executemany\"\n\nresult = {\n    \"row_count\":       ROW_COUNT,\n    \"executemany_ms\":  executemany_ms,\n    \"copy_expert_ms\":  copy_expert_ms,\n    \"copy_speedup\":    copy_speedup,\n    \"recommendation\":  recommendation,\n}\nprint(json.dumps(result, indent=2))\nprint(\"PASS\")\n"}