PyMongo Schema

0.4.2 · active · verified Thu Apr 16

PyMongo Schema is a Python library designed to analyze MongoDB collections and databases, inferring their underlying schema structure. It helps users understand the document shapes within their MongoDB instances. As of version 0.4.2, it provides tools for generating schema definitions but does not enforce them. The project has a low release cadence, indicating stability but also less frequent updates.

Common errors

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to connect to a MongoDB instance, insert sample data into a temporary collection, and then use `pymongo_schema.Schema` to infer the schema of a single collection and `pymongo_schema.db.DBSchema` to infer the schema of an entire database. It includes basic error handling for MongoDB connection issues and cleans up the temporary database.

import os
import pymongo
from pymongo_schema import Schema
from pymongo_schema.db import DBSchema

# Ensure MongoDB is running on localhost:27017
# For authentication, use os.environ.get('MONGO_USER') etc.
MONGO_URI = os.environ.get('MONGO_URI', 'mongodb://localhost:27017/')
DB_NAME = 'pymongo_schema_test_db'
COLLECTION_NAME = 'my_test_collection'

try:
    client = pymongo.MongoClient(MONGO_URI)
    db = client[DB_NAME]
    collection = db[COLLECTION_NAME]

    # Insert some dummy data for schema inference
    collection.insert_many([
        {"name": "Alice", "age": 30, "city": "New York"},
        {"name": "Bob", "age": 25, "hobbies": ["reading", "coding"]},
        {"name": "Charlie", "age": 35, "city": "London", "is_active": True},
        {"name": "David", "country": "Canada", "age": 40}
    ])

    print(f"--- Schema for collection '{COLLECTION_NAME}' ---")
    collection_schema = Schema(collection)
    schema_result = collection_schema.create_schema()
    # print(schema_result) # Uncomment to see full schema
    print(f"Keys in collection schema: {list(schema_result.keys())}")
    print(f"Name type: {schema_result.get('name', {}).get('type')}")

    print(f"\n--- Schema for database '{DB_NAME}' ---")
    db_schema = DBSchema(db)
    db_schema_result = db_schema.create_schema()
    # print(db_schema_result) # Uncomment to see full DB schema
    print(f"Collections in DB schema: {list(db_schema_result.keys())}")

except pymongo.errors.ConnectionFailure as e:
    print(f"Error: Could not connect to MongoDB at {MONGO_URI}. Please ensure MongoDB is running. Details: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
finally:
    # Clean up the test database
    if 'client' in locals() and client:
        if DB_NAME in client.list_database_names():
            client.drop_database(DB_NAME)
            print(f"\nCleaned up database '{DB_NAME}'.")
        client.close()

view raw JSON →