{"id":20968,"library":"autoawq","title":"AutoAWQ","description":"AutoAWQ implements the AWQ (Activation-aware Weight Quantization) algorithm for 4-bit quantization of large language models, achieving up to 2x speedup during inference. The library is now deprecated as of v0.2.9 (April 2025), with vLLM having adopted the technology. Last tested with Torch 2.6.0 and Transformers 4.51.3.","status":"deprecated","version":"0.2.9","language":"python","source_language":"en","source_url":"https://github.com/casper-hansen/AutoAWQ","tags":["quantization","awq","4-bit","llm","inference","deprecated"],"install":[{"cmd":"pip install autoawq","lang":"bash","label":"pip install autoawq"},{"cmd":"pip install autoawq[extras]","lang":"bash","label":"Install with extras (e.g., evaluate, multimodal)"}],"dependencies":[{"reason":"Core dependency for tensor computation and GPU support; requires >=2.2.0.","package":"torch","optional":false},{"reason":"Required for model loading and tokenization; compatibility may break with versions >4.51.3.","package":"transformers","optional":false}],"imports":[{"note":"","wrong":"","symbol":"AutoAWQForCausalLM","correct":"from awq import AutoAWQForCausalLM"},{"note":"AutoAWQConfig is directly importable from awq, not from awq.utils.","wrong":"from awq.utils import AutoAWQConfig","symbol":"AutoAWQConfig","correct":"from awq import AutoAWQConfig"}],"quickstart":{"code":"from awq import AutoAWQForCausalLM, AutoAWQConfig\nfrom transformers import AutoTokenizer\n\nmodel_path = 'casperhansen/mixtral-instruct-awq'\nquant_config = AutoAWQConfig(bits=4, group_size=128, zero_point=True)\nmodel = AutoAWQForCausalLM.from_pretrained(model_path, config=quant_config, device_map='auto')\ntokenizer = AutoTokenizer.from_pretrained(model_path)\ninputs = tokenizer(\"Hello, how are you?\", return_tensors='pt')\noutputs = model.generate(**inputs, max_new_tokens=100)\nprint(tokenizer.decode(outputs[0]))","lang":"python","description":"Load a pre-quantized AWQ model and generate text."},"warnings":[{"fix":"Migrate to vLLM (pip install vllm) and use vLLM's built-in AWQ support.","message":"AutoAWQ is officially deprecated as of v0.2.9. No further updates or bug fixes will be provided. Users are advised to migrate to vLLM, which has adopted AWQ natively.","severity":"breaking","affected_versions":">=0.2.9"},{"fix":"Use 'from awq import AutoAWQForCausalLM' (note the lowercase 'awq').","message":"Import path confusion: Some online examples show 'from auto_gptq import ...' but AutoAWQ is a separate library. Do not confuse with GPTQ (auto_gptq).","severity":"gotcha","affected_versions":"all"},{"fix":"Pin transformers to <=4.51.3, or upgrade to vLLM which tracks latest transformers versions.","message":"Transformers compatibility is fragile. AutoAWQ v0.2.9 was last tested with Transformers 4.51.3. Using newer versions may cause silent inference errors or import failures.","severity":"gotcha","affected_versions":">=0.2.9"}],"env_vars":null,"last_verified":"2026-04-27T00:00:00.000Z","next_check":"2026-07-26T00:00:00.000Z","problems":[{"fix":"Run: pip install autoawq --upgrade and use: from awq import AutoAWQForCausalLM","cause":"Typo: the import uses 'AutoAWQForCausalLM' but the correct symbol may be case-sensitive; also check that the library is installed with correct version.","error":"ImportError: cannot import name 'AutoAWQForCausalLM' from 'awq'"},{"fix":"Install the package: pip install autoawq, then use: from awq import ...","cause":"AutoAWQ is not installed, or installed but the module name is 'autoawq' (some users mistakenly import 'autoawq' instead of 'awq').","error":"ModuleNotFoundError: No module named 'awq'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}