PyMongo Search Utilities

0.3.0 · active · verified Sat Apr 11

PyMongo Search Utils is a Python library designed to simplify working with vector search in MongoDB Atlas. It provides utilities for generating embeddings, constructing Atlas Search queries, and executing them via PyMongo. Currently at version 0.3.0, it's under active development with releases occurring as features and fixes are introduced, typically every few weeks.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to connect to MongoDB Atlas, initialize `AtlasSearch` with an `OpenAIEmbeddings` function, and perform both vector and text searches. Remember to replace placeholder values for `CONNECTION_STRING`, `OPENAI_API_KEY`, `index_name`, `vector_search_field`, and `text_search_field`.

import os
import pymongo
from pymongo_search_utils import AtlasSearch
from pymongo_search_utils.embeddings import OpenAIEmbeddings

# Replace with your MongoDB Atlas connection string
CONNECTION_STRING = os.environ.get("MONGO_URI", "mongodb://localhost:27017/")

# Replace with your OpenAI API key
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-YOUR_OPENAI_API_KEY")

# Connect to MongoDB Atlas
client = pymongo.MongoClient(CONNECTION_STRING)
db = client["mydatabase"]
collection = db["mycollection"]

# Initialize OpenAI Embeddings
embedding_service = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

# Initialize AtlasSearch
atlas_search = AtlasSearch(
    collection=collection,
    index_name="default", # Your Atlas Search index name
    embedding_function=embedding_service,
    vector_search_field="plot_embedding", # The field in your collection containing vector embeddings
    text_search_field="plot" # The field in your collection for text search
)

# Example: Insert dummy data (if collection is empty)
if collection.count_documents({}) == 0:
    print("Inserting dummy data...")
    collection.insert_one({"plot": "A dog goes on an adventure.", "plot_embedding": embedding_service.embed_query("A dog goes on an adventure.")})
    collection.insert_one({"plot": "Two friends discover a magical portal.", "plot_embedding": embedding_service.embed_query("Two friends discover a magical portal.")})
    print("Dummy data inserted.")

# Perform a vector search
query = "a furry companion's journey"
results_vector = atlas_search.vector_search(query_string=query, limit=1)

print(f"\nVector Search Results for '{query}':")
for doc in results_vector:
    print(f"  - Plot: {doc.get('plot')}")

# Perform a text search
query_text = "magical portal"
results_text = atlas_search.text_search(query_string=query_text, limit=1)

print(f"\nText Search Results for '{query_text}':")
for doc in results_text:
    print(f"  - Plot: {doc.get('plot')}")

client.close()

view raw JSON →