Databricks AI Bridge

0.18.0 · active · verified Sat Apr 11

Official Python library for Databricks AI support, simplifying Retrieval Augmented Generation (RAG) applications within the Databricks ecosystem. It provides tools for document processing, vectorization, and model serving directly integrated with Databricks services. Currently at version 0.18.0, it follows a pre-1.0 release cadence with frequent updates.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates initializing the Databricks Workspace Client, then setting up DatabricksVectorSearch, DatabricksLLM, and PromptEngineer components. It requires a configured Databricks environment, including environment variables `DATABRICKS_HOST` and `DATABRICKS_TOKEN`, and pre-existing Databricks Vector Search indexes and model serving endpoints for full functionality.

import os
from databricks.sdk import WorkspaceClient
from databricks_ai_bridge.rag.vectorstores import DatabricksVectorSearch
from databricks_ai_bridge.model_serving.prompt_engineering import PromptEngineer
from databricks_ai_bridge.model_serving.llm import DatabricksLLM

# Initialize Databricks Workspace Client
# Ensure DATABRICKS_HOST and DATABRICKS_TOKEN environment variables are set
# or passed as arguments to WorkspaceClient
host = os.environ.get("DATABRICKS_HOST", "https://dummy-host.cloud.databricks.com")
token = os.environ.get("DATABRICKS_TOKEN", "dapi-dummy-token")

w = None
try:
    # In a real environment, replace dummy values with actual config or ensure env vars are set.
    w = WorkspaceClient(host=host, token=token)
    # Attempt to verify client connection (optional, but good for debugging)
    # w.current_user.me() 
    print("Databricks Workspace Client initialized.")
except Exception as e:
    print(f"Warning: Could not initialize Databricks WorkspaceClient. Check DATABRICKS_HOST/TOKEN: {e}")
    print("Quickstart will proceed with dummy client, but actual interactions will fail.")

if w:
    # Example: Using Databricks Vector Search for RAG (requires a configured Vector Search index)
    # Replace with your actual catalog, schema, table, and index names
    catalog = "main"
    schema = "default"
    table = "my_documents_table"
    index_name = "my_vector_search_index"

    try:
        vector_store = DatabricksVectorSearch(
            w,
            catalog_name=catalog,
            schema_name=schema,
            table_name=table,
            index_name=index_name,
        )
        print(f"Initialized DatabricksVectorSearch for index: {index_name}")

        # Example: Using PromptEngineer and DatabricksLLM
        # Ensure a model is served at the specified endpoint in your Databricks workspace
        llm_endpoint = "databricks-mixtral-8x7b-instruct" # Example Foundation Model endpoint or custom served model
        llm = DatabricksLLM(workspace_client=w, serving_endpoint=llm_endpoint)

        engineer = PromptEngineer(llm=llm, vector_store=vector_store)

        query = "What is Databricks Lakehouse Platform?"
        print(f"PromptEngineer initialized. A live call to engineer.answer_question('{query}') would retrieve information using the configured LLM and vector store on Databricks.")
        # To run live:
        # response = engineer.answer_question(query)
        # print(f"Query: {query}\nResponse: {response}")

    except Exception as e:
        print(f"Error during quickstart setup. This often means required Databricks resources (e.g., Vector Search index, LLM endpoint) are not configured or accessible: {e}")
        print("Please ensure your Databricks environment is correctly set up as per databricks-ai-bridge documentation.")
else:
    print("Skipping Databricks AI Bridge component initialization due to WorkspaceClient failure.")

view raw JSON →