{"id":"shell-command-execution","version":"1.0.0","primitive":"code_execution","description":"Execute shell commands safely, capture output, and handle errors","registry_refs":["none (stdlib only)"],"tags":["subprocess","shell","cli","command-execution","timeout","stdlib","security"],"solves":["shell injection via shell=True","infinite hangs without timeout","swallowed stderr"],"auth_required":false,"verified":true,"last_verified":"2026-04-13","next_check":"2026-07-13","eval_result":"null","eval_env":"null","mast":[],"ref":"https://arxiv.org/abs/2503.13657","inputs":[],"executable":"# ============================================\n# checklist:     shell-command-execution\n# version:       1.0.0\n# primitive:     code_execution\n# description:   Execute shell commands safely, capture output, and handle errors\n# registry_refs: none (stdlib only)\n# auth_required: false\n# verified:      true\n# last_verified: 2026-04-13\n# next_check:    2026-07-13\n# eval_result:   null\n# eval_env:      null\n#\n# MAST FAILURE MODES ADDRESSED:\n# FM-1.1 Disobey Task Specification    — shell=False enforced, no shell injection\n# FM-1.5 Unaware of Termination Conditions — timeout always set, no infinite hang\n# FM-2.6 Reasoning-Action Mismatch     — check=False + returncode check, not bare run()\n# FM-3.2 No or Incomplete Verification — stdout, stderr, returncode all verified\n# FM-3.3 Incorrect Verification        — assert output content, not just \"ran\"\n#\n# tags:   subprocess, shell, cli, command-execution, timeout, stdlib, security\n# solves: shell injection via shell=True, infinite hangs without timeout, swallowed stderr\n# ref: https://arxiv.org/abs/2503.13657\n#\n# INPUTS:\n#   TIMEOUT_SECONDS — int, default timeout for subprocess calls (default: 10)\n#\n# OUTPUTS:\n#   echo_verified              — bool, echo command ran and output matched\n#   version_check_verified     — bool, python --version returned \"Python\"\n#   error_handling_verified    — bool, failed import returned non-zero returncode\n#   inline_python_verified     — bool, python -c produced expected output\n#   timeout_verified           — bool, TimeoutExpired fired on long-running command\n#   structured_result_verified — bool, run_command() returns correct dict shape\n# ============================================\n\nimport sys\nimport subprocess\nimport shlex\n\n# ----------------------------------------\n# PRE_EXECUTION\n# no registry fetch — stdlib only\n# validate environment basics\n# ----------------------------------------\n\n# verify python is callable\nassert sys.executable, \"ABORT: sys.executable is empty\"\n\nprint(\"PRE_EXECUTION: environment verified ✓\")\nprint(f\"  python: {sys.executable}\")\n\n# ----------------------------------------\n# EXECUTION\n# FM-1.1: shell=False — always pass list, never string\n#         shell=True opens injection vulnerabilities\n#         and behaves differently across OS\n# FM-1.5: timeout always set — never omit\n# FM-2.6: capture_output=True — always capture\n#         don't let output bleed to terminal silently\n# ----------------------------------------\n\n# 1. simple command — echo\nresult = subprocess.run(\n    [\"echo\", \"hello world\"],   # FM-1.1: list, not string\n    capture_output=True,\n    text=True,\n    timeout=10                 # FM-1.5: always set timeout\n)\n\nassert result.returncode == 0, \\\n    f\"FAIL: echo returned {result.returncode}\"\nassert \"hello world\" in result.stdout, \\\n    f\"FAIL: expected 'hello world' in stdout, got '{result.stdout}'\"\n\nprint(\"EXECUTION: echo command ✓\")\n\n# 2. command with arguments — python version check\nresult = subprocess.run(\n    [sys.executable, \"--version\"],\n    capture_output=True,\n    text=True,\n    timeout=10\n)\n\nassert result.returncode == 0, \\\n    f\"FAIL: python --version returned {result.returncode}\"\n\n# python --version outputs to stdout or stderr depending on version\nversion_output = result.stdout + result.stderr\nassert \"Python\" in version_output, \\\n    f\"FAIL: expected 'Python' in output, got '{version_output}'\"\n\nprint(f\"EXECUTION: version check ✓ ({version_output.strip()})\")\n\n# 3. command that fails — verify error handling\n# FM-2.6: check=False so we handle returncode ourselves\nresult = subprocess.run(\n    [sys.executable, \"-c\", \"import nonexistent_module_xyz\"],\n    capture_output=True,\n    text=True,\n    timeout=10,\n    check=False    # FM-2.6: don't raise, inspect returncode\n)\n\nassert result.returncode != 0, \\\n    \"FAIL: bad import should have failed\"\nassert \"ModuleNotFoundError\" in result.stderr or \"ImportError\" in result.stderr, \\\n    f\"FAIL: expected import error in stderr, got '{result.stderr}'\"\n\nprint(\"EXECUTION: error handling verified ✓\")\n\n# 4. run inline python — common agent pattern\nresult = subprocess.run(\n    [sys.executable, \"-c\", \"print('agent output')\"],\n    capture_output=True,\n    text=True,\n    timeout=10\n)\n\nassert result.returncode == 0, \\\n    f\"FAIL: inline python returned {result.returncode}\"\nassert \"agent output\" in result.stdout, \\\n    f\"FAIL: expected 'agent output', got '{result.stdout}'\"\n\nprint(\"EXECUTION: inline python execution ✓\")\n\n# 5. timeout enforcement — FM-1.5\n# command that would hang — verify timeout fires\ntry:\n    subprocess.run(\n        [sys.executable, \"-c\", \"import time; time.sleep(60)\"],\n        capture_output=True,\n        text=True,\n        timeout=2    # short timeout — should fire\n    )\n    assert False, \"FAIL: timeout should have fired\"\nexcept subprocess.TimeoutExpired:\n    pass  # correct — timeout fired as expected\n\nprint(\"EXECUTION: timeout enforcement verified ✓\")\n\n# ----------------------------------------\n# POST_EXECUTION\n# FM-3.2: structured result pattern\n# FM-3.3: verify all fields present and correct\n# ----------------------------------------\n\n# structured result — what an agent should return\ndef run_command(cmd_list, timeout=10):\n    \"\"\"Safe command runner — always returns structured result.\"\"\"\n    try:\n        result = subprocess.run(\n            cmd_list,\n            capture_output=True,\n            text=True,\n            timeout=timeout,\n            check=False\n        )\n        return {\n            \"success\": result.returncode == 0,\n            \"returncode\": result.returncode,\n            \"stdout\": result.stdout.strip(),\n            \"stderr\": result.stderr.strip(),\n            \"timed_out\": False,\n        }\n    except subprocess.TimeoutExpired:\n        return {\n            \"success\": False,\n            \"returncode\": None,\n            \"stdout\": \"\",\n            \"stderr\": \"command timed out\",\n            \"timed_out\": True,\n        }\n\n# verify the structured runner\noutput = run_command([\"echo\", \"hello world\"])\n\nassert output[\"success\"] is True, \\\n    f\"FAIL: expected success=True, got {output}\"\nassert output[\"returncode\"] == 0, \\\n    f\"FAIL: expected returncode=0, got {output['returncode']}\"\nassert output[\"stdout\"] == \"hello world\", \\\n    f\"FAIL: expected 'hello world', got '{output['stdout']}'\"\nassert output[\"timed_out\"] is False, \\\n    \"FAIL: timed_out should be False\"\n\nprint()\nprint(\"POST_EXECUTION: structured result verified ✓\")\nprint(f\"POST_EXECUTION: output = {output}\")\n\nresult = {\n    \"status\": \"pass\",\n    \"echo_verified\": True,\n    \"version_check_verified\": True,\n    \"error_handling_verified\": True,\n    \"inline_python_verified\": True,\n    \"timeout_verified\": True,\n    \"structured_result_verified\": True,\n}\nprint(result)\nprint(\"PASS\")\n"}