shell-command-execution

code_execution · verified · null · json · download .py

Execute shell commands safely, capture output, and handle errors

import sys
import subprocess
import shlex

# ----------------------------------------
# PRE_EXECUTION
# no registry fetch — stdlib only
# validate environment basics
# ----------------------------------------

# verify python is callable
assert sys.executable, "ABORT: sys.executable is empty"

print("PRE_EXECUTION: environment verified ✓")
print(f"  python: {sys.executable}")

# ----------------------------------------
# EXECUTION
# FM-1.1: shell=False — always pass list, never string
#         shell=True opens injection vulnerabilities
#         and behaves differently across OS
# FM-1.5: timeout always set — never omit
# FM-2.6: capture_output=True — always capture
#         don't let output bleed to terminal silently
# ----------------------------------------

# 1. simple command — echo
result = subprocess.run(
    ["echo", "hello world"],   # FM-1.1: list, not string
    capture_output=True,
    text=True,
    timeout=10                 # FM-1.5: always set timeout
)

assert result.returncode == 0, \
    f"FAIL: echo returned {result.returncode}"
assert "hello world" in result.stdout, \
    f"FAIL: expected 'hello world' in stdout, got '{result.stdout}'"

print("EXECUTION: echo command ✓")

# 2. command with arguments — python version check
result = subprocess.run(
    [sys.executable, "--version"],
    capture_output=True,
    text=True,
    timeout=10
)

assert result.returncode == 0, \
    f"FAIL: python --version returned {result.returncode}"

# python --version outputs to stdout or stderr depending on version
version_output = result.stdout + result.stderr
assert "Python" in version_output, \
    f"FAIL: expected 'Python' in output, got '{version_output}'"

print(f"EXECUTION: version check ✓ ({version_output.strip()})")

# 3. command that fails — verify error handling
# FM-2.6: check=False so we handle returncode ourselves
result = subprocess.run(
    [sys.executable, "-c", "import nonexistent_module_xyz"],
    capture_output=True,
    text=True,
    timeout=10,
    check=False    # FM-2.6: don't raise, inspect returncode
)

assert result.returncode != 0, \
    "FAIL: bad import should have failed"
assert "ModuleNotFoundError" in result.stderr or "ImportError" in result.stderr, \
    f"FAIL: expected import error in stderr, got '{result.stderr}'"

print("EXECUTION: error handling verified ✓")

# 4. run inline python — common agent pattern
result = subprocess.run(
    [sys.executable, "-c", "print('agent output')"],
    capture_output=True,
    text=True,
    timeout=10
)

assert result.returncode == 0, \
    f"FAIL: inline python returned {result.returncode}"
assert "agent output" in result.stdout, \
    f"FAIL: expected 'agent output', got '{result.stdout}'"

print("EXECUTION: inline python execution ✓")

# 5. timeout enforcement — FM-1.5
# command that would hang — verify timeout fires
try:
    subprocess.run(
        [sys.executable, "-c", "import time; time.sleep(60)"],
        capture_output=True,
        text=True,
        timeout=2    # short timeout — should fire
    )
    assert False, "FAIL: timeout should have fired"
except subprocess.TimeoutExpired:
    pass  # correct — timeout fired as expected

print("EXECUTION: timeout enforcement verified ✓")

# ----------------------------------------
# POST_EXECUTION
# FM-3.2: structured result pattern
# FM-3.3: verify all fields present and correct
# ----------------------------------------

# structured result — what an agent should return
def run_command(cmd_list, timeout=10):
    """Safe command runner — always returns structured result."""
    try:
        result = subprocess.run(
            cmd_list,
            capture_output=True,
            text=True,
            timeout=timeout,
            check=False
        )
        return {
            "success": result.returncode == 0,
            "returncode": result.returncode,
            "stdout": result.stdout.strip(),
            "stderr": result.stderr.strip(),
            "timed_out": False,
        }
    except subprocess.TimeoutExpired:
        return {
            "success": False,
            "returncode": None,
            "stdout": "",
            "stderr": "command timed out",
            "timed_out": True,
        }

# verify the structured runner
output = run_command(["echo", "hello world"])

assert output["success"] is True, \
    f"FAIL: expected success=True, got {output}"
assert output["returncode"] == 0, \
    f"FAIL: expected returncode=0, got {output['returncode']}"
assert output["stdout"] == "hello world", \
    f"FAIL: expected 'hello world', got '{output['stdout']}'"
assert output["timed_out"] is False, \
    "FAIL: timed_out should be False"

print()
print("POST_EXECUTION: structured result verified ✓")
print(f"POST_EXECUTION: output = {output}")

result = {
    "status": "pass",
    "echo_verified": True,
    "version_check_verified": True,
    "error_handling_verified": True,
    "inline_python_verified": True,
    "timeout_verified": True,
    "structured_result_verified": True,
}
print(result)
print("PASS")