{"library":"mlserver-mlflow","title":"MLServer MLflow Runtime","description":"mlserver-mlflow provides an MLflow runtime for MLServer, allowing users to serve models logged with MLflow using the MLServer inference server. It's currently at version 1.7.1 and maintains a release cadence aligned with MLServer's development, receiving updates for bug fixes and compatibility with new MLflow/MLServer versions.","language":"python","status":"active","last_verified":"Thu Apr 16","install":{"commands":["pip install mlserver-mlflow"],"cli":null},"imports":["from mlserver_mlflow import MLflowRuntime"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"import os\nimport tempfile\nimport mlflow\nimport mlflow.sklearn\nfrom sklearn.linear_model import LogisticRegression\nimport numpy as np\nimport asyncio\nfrom mlserver_mlflow import MLflowRuntime\nfrom mlserver.settings import ModelSettings\nfrom mlserver.types import InferenceRequest, RequestInput\n\n# 1. Create a dummy MLflow model and log it locally\n#    (In a real scenario, this model would already be logged)\ntemp_dir = tempfile.TemporaryDirectory()\nmodel_base_path = os.path.join(temp_dir.name, \"mlflow_models\")\nmlflow.set_tracking_uri(f\"file://{model_base_path}/mlruns\")\nwith mlflow.start_run():\n    model = LogisticRegression()\n    model.fit(np.array([[0,0],[1,1]]), np.array([0,1]))\n    mlflow.sklearn.log_model(model, \"model_artifact\")\n    model_uri = f\"file://{mlflow.active_run().info.artifact_uri}/model_artifact\"\n\n# 2. Instantiate and load MLflowRuntime\nasync def main():\n    model_settings = ModelSettings(\n        name=\"my-mlflow-model\",\n        implementation=\"mlserver_mlflow.MLflowRuntime\",\n        parameters={\n            \"uri\": model_uri\n        }\n    )\n    mlflow_runtime = MLflowRuntime(model_settings)\n    await mlflow_runtime.load()\n\n    # 3. Prepare and send inference request\n    request_input = RequestInput(\n        name=\"predict\",\n        shape=[1, 2],\n        datatype=\"FP32\",\n        data=[[0.5, 0.5]]\n    )\n    inference_request = InferenceRequest(inputs=[request_input])\n\n    response = await mlflow_runtime.predict(inference_request)\n    print(\"Prediction:\", response.outputs[0].data)\n\n    await mlflow_runtime.unload()\n    temp_dir.cleanup() # Clean up temporary model files\n\nasyncio.run(main())\n","lang":"python","description":"This quickstart demonstrates how to programmatically use `MLflowRuntime` to load an MLflow model and perform an inference. It first creates a dummy MLflow model and logs it locally, then uses its URI to instantiate `MLflowRuntime` within MLServer's `ModelSettings`, loads the model, and makes a prediction. The `asyncio.run(main())` block executes the asynchronous model loading and inference.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":null}