{"library":"autoawq","type":"library","category":null,"description":"AutoAWQ implements the AWQ (Activation-aware Weight Quantization) algorithm for 4-bit quantization of large language models, achieving up to 2x speedup during inference. The library is now deprecated as of v0.2.9 (April 2025), with vLLM having adopted the technology. Last tested with Torch 2.6.0 and Transformers 4.51.3.","language":"python","status":"deprecated","version":"0.2.9","tags":["quantization","awq","4-bit","llm","inference","deprecated"],"last_verified":"Mon Apr 27","install":[{"cmd":"pip install autoawq","imports":["from awq import AutoAWQForCausalLM","from awq import AutoAWQConfig"]},{"cmd":"pip install autoawq[extras]","imports":[]}],"homepage":null,"github":"https://github.com/casper-hansen/AutoAWQ","docs":null,"changelog":null,"pypi":"https://pypi.org/project/autoawq/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":null}