{"library":"lmdeploy","title":"LMDeploy","description":"LMDeploy is a toolkit for compressing, deploying, and serving large language models (LLMs). It supports efficient inference with quantization, continuous batching, and various backends (e.g., PyTorch, TensorRT-LLM). The current version is 0.12.3, with frequent releases following the development of dependencies and model support.","language":"python","status":"active","last_verified":"Fri May 01","install":{"commands":["pip install lmdeploy"],"cli":{"name":"lmdeploy","version":""}},"imports":["from lmdeploy import pipeline","from lmdeploy import TurbomindEngineConfig"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"from lmdeploy import pipeline\nfrom lmdeploy import TurbomindEngineConfig\n\nengine_config = TurbomindEngineConfig(model_format='hf', tp=1)\npipe = pipeline('internlm/internlm2_5-1_8b', engine_config=engine_config)\nresponse = pipe('Hello, how are you?')\nprint(response.text)","lang":"python","description":"Initialize a pipeline with a Hugging Face model and engine config, then generate a response.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":null}