{"id":"s3-download-file","version":"1.0.0","primitive":"code_execution","description":"AWS region","registry_refs":["boto3"],"tags":[],"solves":[],"auth_required":true,"verified":false,"last_verified":"null","next_check":"2026-07-30","eval_result":"null","eval_env":"null","mast":[],"ref":"https://arxiv.org/abs/2503.13657","inputs":[{"name":"AWS_ACCESS_KEY_ID","required":true,"description":"AWS access key ID"},{"name":"AWS_SECRET_ACCESS_KEY","required":true,"description":"AWS secret access key"},{"name":"S3_BUCKET","required":true,"description":"S3 bucket name to download from"},{"name":"AWS_REGION","default":"us-east-1","required":false,"description":"AWS region"}],"executable":"# ============================================\n# checklist:     s3-download-file\n# version:       1.0.0\n# primitive:     code_execution\n# description:   Download a file from S3 using download_file and get_object, and handle the StreamingBody footgun\n# registry_refs: boto3\n# auth_required: true\n# verified:      false\n# last_verified: null\n# next_check:    2026-07-30\n# eval_result:   null\n# eval_env:      null\n#\n# inputs:\n#   - name: AWS_ACCESS_KEY_ID\n#     required: true\n#     description: AWS access key ID\n#   - name: AWS_SECRET_ACCESS_KEY\n#     required: true\n#     description: AWS secret access key\n#   - name: S3_BUCKET\n#     required: true\n#     description: S3 bucket name to download from\n#   - name: AWS_REGION\n#     required: false\n#     default: \"us-east-1\"\n#     description: AWS region\n#\n# OUTPUTS:\n#   download_file_ok   — true if download_file wrote correct content\n#   get_object_ok      — true if get_object returned correct content\n#   content_match      — true if both methods returned identical content\n#\n# MAST FAILURE MODES ADDRESSED:\n# FM-1.1 Disobey Task Specification        — StreamingBody.read() called explicitly\n# FM-3.3 Incorrect Verification            — content verified byte-for-byte, not just size\n#\n# ref: https://arxiv.org/abs/2503.13657\n# ============================================\n\nimport sys\nimport os\nimport subprocess\nimport time\nimport tempfile\nimport urllib.request\nimport json\n\n# ─────────────────────────────────────────\n# PRE_EXECUTION\n# ─────────────────────────────────────────\n\nfor attempt in range(2):\n    try:\n        req = urllib.request.Request(\n            \"https://checklist.day/api/registry/boto3\",\n            headers={\"User-Agent\": \"checklist-agent/1.0\"}\n        )\n        with urllib.request.urlopen(req, timeout=10) as resp:\n            registry = json.loads(resp.read())\n            break\n    except Exception as e:\n        if attempt == 1:\n            print(f\"ABORT: registry unreachable — {e}\")\n            sys.exit(1)\n        time.sleep(2)\n\nwarnings = registry.get(\"warnings\", [])\nif warnings:\n    print(\"[boto3] WARNINGS:\")\n    for w in warnings if isinstance(warnings, list) else [warnings]:\n        print(f\"  ⚠ {w}\")\n\n# ─────────────────────────────────────────\n# EXECUTION\n# ─────────────────────────────────────────\n\nsubprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"boto3>=1.26.0\"])\n\nimport boto3\n\nAWS_ACCESS_KEY_ID     = os.environ.get(\"AWS_ACCESS_KEY_ID\")\nAWS_SECRET_ACCESS_KEY = os.environ.get(\"AWS_SECRET_ACCESS_KEY\")\nAWS_REGION            = os.environ.get(\"AWS_REGION\", \"us-east-1\")\nS3_BUCKET             = os.environ.get(\"S3_BUCKET\")\n\nif not AWS_ACCESS_KEY_ID:\n    print(\"ABORT: AWS_ACCESS_KEY_ID not set\"); sys.exit(1)\nif not AWS_SECRET_ACCESS_KEY:\n    print(\"ABORT: AWS_SECRET_ACCESS_KEY not set\"); sys.exit(1)\nif not S3_BUCKET:\n    print(\"ABORT: S3_BUCKET not set\"); sys.exit(1)\n\nKEY           = \"checklist-test/download-test.txt\"\nEXPECTED      = b\"checklist.day s3-download-file test\\n\"\n\nclient = boto3.client(\n    \"s3\",\n    aws_access_key_id=AWS_ACCESS_KEY_ID,\n    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,\n    region_name=AWS_REGION,\n)\n\n# Setup: upload test object\nclient.put_object(Bucket=S3_BUCKET, Key=KEY, Body=EXPECTED)\nprint(f\"  uploaded test object: {KEY}\")\n\ntry:\n    # Method 1: download_file — writes directly to disk, handles multipart\n    with tempfile.NamedTemporaryFile(delete=False, suffix=\".txt\") as f:\n        tmp_path = f.name\n\n    client.download_file(S3_BUCKET, KEY, tmp_path)\n    with open(tmp_path, \"rb\") as f:\n        downloaded_content = f.read()\n    os.unlink(tmp_path)\n    download_file_ok = downloaded_content == EXPECTED\n    print(f\"  download_file: {len(downloaded_content)} bytes (match={download_file_ok})\")\n\n    # Method 2: get_object — returns StreamingBody\n    # FOOTGUN: response[\"Body\"] is a StreamingBody, NOT bytes — must call .read()\n    # FOOTGUN: StreamingBody can only be read once — store result immediately\n    response   = client.get_object(Bucket=S3_BUCKET, Key=KEY)\n    body_bytes = response[\"Body\"].read()  # must call .read() explicitly\n    get_object_ok  = body_bytes == EXPECTED\n    content_match  = downloaded_content == body_bytes\n    print(f\"  get_object:    {len(body_bytes)} bytes (match={get_object_ok})\")\n\nfinally:\n    client.delete_object(Bucket=S3_BUCKET, Key=KEY)\n    print(f\"  cleaned up: {KEY}\")\n\n# ─────────────────────────────────────────\n# POST_EXECUTION\n# ─────────────────────────────────────────\n\nassert download_file_ok, \"FAIL: download_file content mismatch\"\nassert get_object_ok, \"FAIL: get_object content mismatch\"\nassert content_match, \"FAIL: download_file and get_object returned different content\"\n\nresult = {\n    \"download_file_ok\": download_file_ok,\n    \"get_object_ok\":    get_object_ok,\n    \"content_match\":    content_match,\n}\nprint(json.dumps(result, indent=2))\nprint(\"PASS\")\n"}