{"id":27848,"library":"gram-newton-schulz","title":"Gram-Newton-Schulz","description":"A fast implementation of the Newton-Schulz algorithm for computing matrix square roots and Gram matrix inverses, with support for JIT-compiled kernels via Quack. Current version 0.1.4, requires Python >=3.10. Released under the Dao-AILab organization, with occasional updates.","status":"active","version":"0.1.4","language":"python","source_language":"en","source_url":"https://github.com/Dao-AILab/gram-newton-schulz","tags":["matrix-square-root","newton-schulz","jitt","quack","pytorch"],"install":[{"cmd":"pip install gram-newton-schulz","lang":"bash","label":"Standard install"}],"dependencies":[{"reason":"Core tensor operations and compilation","package":"torch","optional":false},{"reason":"JIT kernel backend (>=0.3.7)","package":"quack","optional":false}],"imports":[{"note":"Class is inside the package, not a top-level module","wrong":"import GramNewtonSchulz","symbol":"GramNewtonSchulz","correct":"from gram_newton_schulz import GramNewtonSchulz"},{"note":"","wrong":null,"symbol":"NewtonSchulzBase","correct":"from gram_newton_schulz import NewtonSchulzBase"}],"quickstart":{"code":"import torch\nfrom gram_newton_schulz import GramNewtonSchulz\n\n# Create a symmetric positive-definite matrix\nA = torch.randn(4, 4, device='cuda' if torch.cuda.is_available() else 'cpu')\nA = A @ A.T\n\n# Initialize the solver with default settings\nsolver = GramNewtonSchulz()\n\n# Compute the matrix square root (X such that X @ X ≈ A)\nX = solver.sqrt(A)\nprint(X)","lang":"python","description":"Compute matrix square root using Gram-Newton-Schulz algorithm."},"warnings":[{"fix":"Ensure your matrix is symmetric and has positive eigenvalues. Consider adding a small regularization term like `A + 1e-6 * torch.eye(A.shape[0])`.","message":"Input matrix must be symmetric positive-definite; non-SPD matrices may cause convergence failure or incorrect results.","severity":"gotcha","affected_versions":">=0.1.0"},{"fix":"Enable torch.compile by setting `torch.compile(solver.sqrt)` or using the `compile_kwargs` argument.","message":"Torch.compile is required for performance; without it, fallback implementation may be very slow or unsupported.","severity":"gotcha","affected_versions":">=0.1.0"},{"fix":"Replace `from gram_newton_schulz import StandardNewtonSchulz` with `from gram_newton_schulz import GramNewtonSchulz`.","message":"The `StandardNewtonSchulz` class was merged into `GramNewtonSchulz` in v0.1.0. Old imports will break.","severity":"deprecated","affected_versions":"<0.1.0"}],"env_vars":null,"last_verified":"2026-05-09T00:00:00.000Z","next_check":"2026-08-07T00:00:00.000Z","problems":[{"fix":"Run `pip install gram-newton-schulz`. The correct import string is `from gram_newton_schulz import ...`.","cause":"Library not installed, or installed with wrong name.","error":"ModuleNotFoundError: No module named 'gram_newton_schulz'"},{"fix":"Ensure both the solver (if it holds state) and input tensors are on the same device. Use `solver = GramNewtonSchulz().to(device)`.","cause":"Solver is initialized on CPU while input tensor is on GPU, or vice versa.","error":"RuntimeError: Expected all tensors to be on the same device, but found at least two devices"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}