Azure AI Document Intelligence

1.0.2 · active · verified Thu Apr 09

Microsoft Azure AI Document Intelligence Client Library for Python. This library provides access to Azure AI Document Intelligence (formerly Form Recognizer) services for processing documents and extracting data. It follows the Azure SDK guidelines for Python, offering features like layout analysis, prebuilt models for common document types, custom model building, and document classification.

Warnings

Install

Imports

Quickstart

Demonstrates how to initialize a `DocumentIntelligenceClient` with an endpoint and API key, and then use it to analyze a document from a URL using a prebuilt model. It iterates through the extracted fields and prints their content and confidence.

import os
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.core.credentials import AzureKeyCredential

# Set your Document Intelligence endpoint and key as environment variables
# e.g., AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT and AZURE_DOCUMENT_INTELLIGENCE_KEY
endpoint = os.environ.get("AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT", "<your-endpoint>")
key = os.environ.get("AZURE_DOCUMENT_INTELLIGENCE_KEY", "<your-key>")

if endpoint == "<your-endpoint>" or key == "<your-key>":
    print("Please set the AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT and AZURE_DOCUMENT_INTELLIGENCE_KEY environment variables.")
    print("You can find these in your Azure portal under your Document Intelligence resource's 'Keys and Endpoint' section.")
else:
    document_url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/documentintelligence/azure-ai-documentintelligence/samples/sample_forms/forms/Invoice_1.pdf"

    document_intelligence_client = DocumentIntelligenceClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )

    print(f"Analyzing document from URL: {document_url}")
    # Use 'prebuilt-invoice' for invoices, 'prebuilt-receipt' for receipts, etc.
    # Or use your custom model_id for custom models
    poller = document_intelligence_client.begin_analyze_document_from_url(
        "prebuilt-invoice", document_url
    )
    result = poller.result()

    if result.documents:
        for idx, document in enumerate(result.documents):
            print(f"\n--- Document {idx + 1} Analysis ---")
            if document.doc_type:
                print(f"  Document type: {document.doc_type}")
            if document.fields:
                print("  Extracted Fields:")
                for name, field in document.fields.items():
                    if field.content:
                        print(f"    {name}: {field.content} (Confidence: {field.confidence:.2f})")
    else:
        print("No documents found in the analysis result.")

view raw JSON →