MongoDB integration for LangChain

0.11.0 · active · verified Fri Apr 10

This package provides integrations for MongoDB products within the LangChain ecosystem, including VectorStore, DocumentLoader, and ChatMessageHistory capabilities. It is currently at version 0.11.0 and is actively maintained, receiving frequent updates to align with LangChain's evolving architecture and MongoDB's features.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to use `MongoDBAtlasVectorSearch` to store and query documents. It connects to a MongoDB cluster (defaults to local if env vars aren't set), initializes a vector store with a placeholder embedding model, adds documents, and performs a similarity search. Remember to replace `DummyEmbeddings` with a real embedding model (e.g., `OpenAIEmbeddings`) for production use and create a vector search index in MongoDB Atlas.

import os
from pymongo import MongoClient
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch

# NOTE: Replace DummyEmbeddings with a real embedding model (e.g., OpenAIEmbeddings)
# For a runnable example without extra API keys, we use a placeholder.
class DummyEmbeddings:
    def embed_documents(self, texts):
        # Return a list of fixed-size vectors for each text
        return [[0.1] * 1536 for _ in texts]
    def embed_query(self, text):
        # Return a fixed-size vector for a single query
        return [0.1] * 1536

# Environment variables for MongoDB connection
MONGODB_ATLAS_CLUSTER_URI = os.environ.get(
    "MONGODB_ATLAS_CLUSTER_URI", "mongodb://localhost:27017/"
)
MONGODB_DATABASE = os.environ.get("MONGODB_DATABASE", "langchain_db")
MONGODB_COLLECTION = os.environ.get("MONGODB_COLLECTION", "vector_collection")

# Initialize MongoDB client and collection
client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)
collection = client[MONGODB_DATABASE][MONGODB_COLLECTION]

# Initialize embedding model (replace DummyEmbeddings with e.g., OpenAIEmbeddings)
# embeddings = OpenAIEmbeddings(openai_api_key=os.environ.get("OPENAI_API_KEY"))
embeddings = DummyEmbeddings()

# Initialize MongoDB Atlas Vector Search
# Ensure 'default' index exists in MongoDB Atlas on the specified collection
vector_search = MongoDBAtlasVectorSearch(
    collection=collection,
    embedding=embeddings,
    index_name="default", # The name of your Atlas Search Vector Index
)

# Add documents to the vector store
docs = [
    "The quick brown fox jumps over the lazy dog.",
    "A group of cats is called a clowder.",
    "Python is a high-level, interpreted programming language."
]
vector_search.add_texts(docs)
print(f"Added {len(docs)} documents to MongoDB Atlas Vector Search.")

# Perform a similarity search
query = "animals running"
results = vector_search.similarity_search(query, k=1)
print(f"Similarity search results for '{query}':")
for res in results:
    print(f"- {res.page_content}")

# Clean up (optional) - remove added documents
# collection.delete_many({"text": {"$in": docs}})
# print("Cleaned up documents.")

view raw JSON →