{"library":"nvshmem4py-cu12","title":"nvshmem4py-cu12","description":"Python bindings for NVSHMEM (NVIDIA's implementation of OpenSHMEM for GPUs). Version 0.3.0 requires Python >=3.9 and CUDA 12.x. This package enables peer-to-peer GPU communication across NVLink and InfiniBand. Under active development with frequent breaking changes.","language":"python","status":"active","last_verified":"Mon Apr 27","install":{"commands":["pip install nvshmem4py-cu12"],"cli":null},"imports":["from nvshmem import init","from nvshmem import barrier","from nvshmem import my_pe_n","from nvshmem import n_pes"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"import os\nimport cupy as cp\nfrom nvshmem import init, barrier, my_pe_n, n_pes\n\n# Initialize NVSHMEM (must be called after MPI_Init or similar)\ninit()\n\nrank = my_pe_n()\nnranks = n_pes()\n\n# Allocate symmetric memory on GPU\nbuf = cp.empty(1024, dtype=cp.float32)\n\n# Barrier to synchronize\nbarrier()\n\nprint(f\"Rank {rank}/{nranks} ready.\", flush=True)\n\n# Example: send data from rank 0 to rank 1 (if nranks > 1)\nif nranks > 1:\n    if rank == 0:\n        buf[:] = 1.0\n        nvshmem.putmem(buf.data.ptr, 1, 0, 1024 * 4)  # put to rank 1\n    elif rank == 1:\n        nvshmem.getmem(buf.data.ptr, 0, 0, 1024 * 4)  # get from rank 0\n    barrier()\n\nprint(f\"Rank {rank} finished.\", flush=True)","lang":"python","description":"Initialize NVSHMEM, allocate symmetric GPU memory, perform put/get, and barrier.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":null}