{"id":146,"library":"cerebras-cloud-sdk","title":"Cerebras Cloud SDK","description":"Official Python SDK for the Cerebras Cloud inference API. Provides access to ultra-fast LLM inference on Cerebras Wafer-Scale Engine hardware. OpenAI-compatible API surface. Generated with Stainless. Current version: 1.67.0 (Mar 2026). Requires Python 3.9+. Note: separate from cerebras-sdk (PyPI) which is a hardware kernel development tool — completely different product.","status":"active","version":"1.67.0","language":"python","source_language":"en","source_url":"https://github.com/Cerebras/cerebras-cloud-sdk-python","tags":["cerebras","llm","inference","python","fast-inference","llama"],"install":[{"cmd":"pip install cerebras-cloud-sdk","lang":"bash","label":"Python"}],"dependencies":[{"reason":"HTTP client. Installed automatically.","package":"httpx","optional":false},{"reason":"Optional async HTTP backend for improved concurrency. Use instead of default httpx for high-throughput async workloads.","package":"aiohttp","optional":true}],"imports":[{"note":"Import path is 'from cerebras.cloud.sdk import Cerebras' — not 'import cerebras'. LLMs hallucinate a top-level cerebras module that does not exist.","wrong":"import cerebras\nclient = cerebras.Client(api_key='...')","symbol":"Cerebras","correct":"from cerebras.cloud.sdk import Cerebras\nimport os\n\nclient = Cerebras(\n    api_key=os.environ.get('CEREBRAS_API_KEY')\n)\n\nresponse = client.chat.completions.create(\n    model='llama3.1-8b',\n    messages=[{'role': 'user', 'content': 'Why is fast inference important?'}]\n)\nprint(response.choices[0].message.content)"},{"note":"Use AsyncCerebras for async code — same module, different class name.","wrong":"from cerebras.cloud.sdk import Cerebras\n# using sync client in async context","symbol":"AsyncCerebras","correct":"from cerebras.cloud.sdk import AsyncCerebras\nimport asyncio, os\n\nclient = AsyncCerebras(\n    api_key=os.environ.get('CEREBRAS_API_KEY')\n)\n\nasync def main():\n    response = await client.chat.completions.create(\n        model='llama3.1-8b',\n        messages=[{'role': 'user', 'content': 'Hello'}]\n    )\n    print(response.choices[0].message.content)\n\nasyncio.run(main())"}],"quickstart":{"code":"# pip install cerebras-cloud-sdk\nfrom cerebras.cloud.sdk import Cerebras\nimport os\n\nclient = Cerebras(\n    api_key=os.environ.get('CEREBRAS_API_KEY')\n)\n\nresponse = client.chat.completions.create(\n    model='llama3.1-8b',\n    messages=[\n        {'role': 'system', 'content': 'You are a helpful assistant.'},\n        {'role': 'user', 'content': 'What is fast inference?'}\n    ]\n)\nprint(response.choices[0].message.content)","lang":"python","description":"Minimal Cerebras inference call using cerebras-cloud-sdk 1.x."},"warnings":[{"fix":"pip install cerebras-cloud-sdk for cloud inference. cerebras-sdk is for hardware kernel development.","message":"cerebras-sdk on PyPI is a completely different package — it is Cerebras's hardware kernel development SDK for WSE systems. Do not confuse with cerebras-cloud-sdk for cloud inference API.","severity":"breaking","affected_versions":"all"},{"fix":"client = Cerebras(api_key=..., warm_tcp_connection=False) — and reuse a single client instance rather than reconstructing.","message":"SDK sends TCP warming requests to /v1/tcp_warming on client construction to reduce time-to-first-token. Creates network traffic at import time. Disable with warm_tcp_connection=False if reconstructing client frequently.","severity":"gotcha","affected_versions":"all"},{"fix":"Create a module-level singleton client. Do not instantiate Cerebras() inside request handlers or loops.","message":"Reconstructing the Cerebras client instance repeatedly causes poor performance due to repeated TCP warming. Construct once and reuse.","severity":"gotcha","affected_versions":"all"},{"fix":"Use Python 3.9 or higher.","message":"Requires Python 3.9+. Will fail to install on Python 3.8 with no clear error message.","severity":"gotcha","affected_versions":"all"},{"fix":"Use cerebras-cloud-sdk directly, not openai with base_url='https://api.cerebras.ai'.","message":"LLMs with no training data on Cerebras will hallucinate OpenAI-style base_url override pattern. Cerebras has its own SDK — do not use openai with base_url for Cerebras.","severity":"gotcha","affected_versions":"all"},{"fix":"Initialize the Cerebras client with client = Cerebras(api_key='YOUR_API_KEY') or set the CEREBRAS_API_KEY environment variable before running the application.","message":"The Cerebras client requires an API key, which must be passed as an argument (api_key=...) or set via the CEREBRAS_API_KEY environment variable. Without it, the client cannot be initialized.","severity":"breaking","affected_versions":"all"},{"fix":"Set the CEREBRAS_API_KEY environment variable (e.g., `export CEREBRAS_API_KEY='your_api_key'`) or pass `api_key='your_api_key'` when instantiating the Cerebras client (e.g., `client = Cerebras(api_key='your_api_key')`).","message":"The Cerebras client requires an API key for authentication. This can be provided by setting the CEREBRAS_API_KEY environment variable or by passing the 'api_key' argument directly to the Cerebras client constructor.","severity":"breaking","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-12T08:43:55.426Z","next_check":"2026-06-24T00:00:00.000Z","problems":[{"fix":"Install the SDK using pip: `pip install cerebras-cloud-sdk`","cause":"The `cerebras-cloud-sdk` Python package is not installed in the current environment.","error":"ModuleNotFoundError: No module named 'cerebras.cloud.sdk'"},{"fix":"Ensure your `CEREBRAS_API_KEY` environment variable is set correctly, or pass `api_key='YOUR_API_KEY'` to the `Cerebras` client constructor.","cause":"The Cerebras API client was initialized without a valid API key, or the provided key is incorrect or expired.","error":"cerebras.cloud.sdk.AuthenticationError: Invalid API key"},{"fix":"Verify your network connectivity, check the configured `base_url` for the client, and ensure no firewalls are blocking the connection.","cause":"The Cerebras API client failed to establish a connection to the API endpoint, possibly due to network issues, a timeout, or an incorrect base URL.","error":"cerebras.cloud.sdk.APIConnectionError"},{"fix":"Review the API documentation to ensure the model name is correct and all request parameters and paths are valid. Inspect the error's `status_code` and `response` properties for more details.","cause":"The API request targeted a resource that does not exist, such as an incorrect model name or an invalid endpoint path.","error":"cerebras.cloud.sdk.NotFoundError: 404 Not Found"},{"fix":"Consult the `cerebras-cloud-sdk` documentation for the correct parameter names (e.g., `max_new_tokens`) or pass non-standard parameters within the `extra_body` argument if applicable.","cause":"Developers often confuse Cerebras SDK's chat completion parameters with OpenAI's due to its OpenAI-compatible API surface, attempting to use parameters like `max_tokens` (OpenAI's) instead of the Cerebras SDK's specific parameter (e.g., `max_new_tokens`).","error":"AttributeError: 'ChatCompletionCreateParams' object has no attribute 'max_tokens'"}],"ecosystem":"pypi","meta_description":null,"install_score":100,"install_tag":"verified","quickstart_score":80,"quickstart_tag":"verified","pypi_latest":null,"install_checks":{"last_tested":"2026-05-12","tag":"verified","tag_description":"installs cleanly on critical runtimes, fast import, recently tested","results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":0.7,"mem_mb":15.4,"disk_size":"34.1M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":0.54,"mem_mb":15.4,"disk_size":"34M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":1,"mem_mb":17.1,"disk_size":"37.1M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":0.83,"mem_mb":17.1,"disk_size":"37M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":1.13,"mem_mb":16.9,"disk_size":"28.6M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":1.11,"mem_mb":16.9,"disk_size":"28M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":1.02,"mem_mb":17.8,"disk_size":"28.2M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":1.01,"mem_mb":17.8,"disk_size":"28M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":0.65,"mem_mb":15.5,"disk_size":"33.1M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"default","exit_code":0,"wheel_type":null,"failure_reason":null,"install_time_s":null,"import_time_s":0.59,"mem_mb":15.5,"disk_size":"33M"}]},"quickstart_checks":{"last_tested":"2026-04-23","tag":"verified","tag_description":"quickstart runs on critical runtimes, recently tested","results":[{"runtime":"python:3.10-alpine","exit_code":0},{"runtime":"python:3.10-slim","exit_code":0},{"runtime":"python:3.11-alpine","exit_code":0},{"runtime":"python:3.11-slim","exit_code":0},{"runtime":"python:3.12-alpine","exit_code":0},{"runtime":"python:3.12-slim","exit_code":0},{"runtime":"python:3.13-alpine","exit_code":0},{"runtime":"python:3.13-slim","exit_code":0},{"runtime":"python:3.9-alpine","exit_code":0},{"runtime":"python:3.9-slim","exit_code":0}]}}