{"library":"auto-gptq","type":"library","category":null,"description":"AutoGPTQ is an easy-to-use LLMs quantization package based on the GPTQ algorithm. It provides user-friendly APIs for quantizing and running large language models with reduced memory usage. Current version 0.7.1 supports loading sharded quantized checkpoints and Gemma models. Release cadence is irregular, with major features in point releases.","language":"python","status":"active","version":"0.7.1","tags":["llm","quantization","gptq","cuda","exllama","inference"],"last_verified":"Fri May 01","install":[{"cmd":"pip install auto-gptq","imports":["from auto_gptq import AutoGPTQForCausalLM","from auto_gptq import BaseQuantizeConfig","from auto_gptq import exllama_set_max_input_length"]},{"cmd":"pip install auto-gptq[exllama]","imports":[]}],"homepage":null,"github":"https://github.com/PanQiWei/AutoGPTQ","docs":null,"changelog":null,"pypi":"https://pypi.org/project/auto-gptq/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":null}