OpenSearch DSL

2.1.0 · deprecated · verified Tue Apr 14

The `opensearch-dsl` library provides a high-level, declarative Python client for OpenSearch, enabling users to work with OpenSearch entities like documents and search queries as Python objects. It simplifies query construction and common OpenSearch operations, building on top of the lower-level `opensearch-py` client. The current version is 2.1.0. It has been announced that this library will be deprecated after version 2.1.0, with its functionality merged into `opensearch-py`.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to connect to an OpenSearch cluster, define a document schema using `opensearch-dsl.Document`, create an index, index a document, and perform a search query. It uses environment variables for OpenSearch connection details, which is recommended for production settings.

import os
from opensearchpy import OpenSearch
from opensearch_dsl import Document, Text, Keyword, Search

# Configuration from environment variables for security and flexibility
OPENSEARCH_HOST = os.environ.get('OPENSEARCH_HOST', 'localhost')
OPENSEARCH_PORT = int(os.environ.get('OPENSEARCH_PORT', 9200))
OPENSEARCH_USER = os.environ.get('OPENSEARCH_USER', 'admin')
OPENSEARCH_PASSWORD = os.environ.get('OPENSEARCH_PASSWORD', 'admin')
OPENSEARCH_CA_CERTS = os.environ.get('OPENSEARCH_CA_CERTS', None) # e.g., '/full/path/to/root-ca.pem'

# Create the OpenSearch client
client = OpenSearch(
    hosts=[{'host': OPENSEARCH_HOST, 'port': OPENSEACH_PORT}],
    http_compress=True, # enables gzip compression for request bodies
    http_auth=(OPENSEARCH_USER, OPENSEARCH_PASSWORD),
    use_ssl=True if OPENSEARCH_HOST != 'localhost' else False, # Use SSL for non-localhost
    verify_certs=True,
    ssl_assert_hostname=False, # Disable hostname verification for testing/local setups
    ssl_show_warn=False,
    ca_certs=OPENSEARCH_CA_CERTS
)

index_name = 'my-dsl-index'

# 1. Define a Document class
class MyDocument(Document):
    title = Text(fields={'raw': Keyword()})
    description = Text()
    category = Keyword()

    class Index:
        name = index_name
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 0
        }

# 2. Create the index (if it doesn't exist)
if not client.indices.exists(index_name):
    response = client.indices.create(index_name, body=MyDocument._index.to_dict())
    print('Creating index:', response)

# 3. Index a document
doc = MyDocument(meta={'id': '1'}, title='Python Basics', description='A guide to Python programming', category='programming')
doc.save(using=client)
print('Indexed document:', doc.to_dict())

# 4. Refresh the index to make the document searchable
client.indices.refresh(index=index_name)

# 5. Search for the document
s = Search(using=client, index=index_name)
s = s.filter('term', category='programming').query('match', title='python')
response = s.execute()

print('\nSearch results:')
for hit in response:
    print(f"Score: {hit.meta.score}, Title: {hit.title}, Category: {hit.category}")

# 6. Clean up: Delete the document and then the index (optional)
# client.delete(index=index_name, id='1', refresh=True)
# print('\nDeleted document with id 1')
# client.indices.delete(index=index_name)
# print('Deleted index:', index_name)

view raw JSON →