Databento Binary Encoding (DBN) Python Bindings

0.54.0 · active · verified Wed Apr 15

Python bindings for encoding and decoding Databento Binary Encoding (DBN). This library provides efficient Rust-backed functionality for working with DBN data streams and files, offering features like record buffering, mutable record references, and direct access to timestamp fields. As of version 0.54.0, it includes enhancements for dynamic record types and improved memory management. Releases occur frequently, typically on a monthly or bi-monthly basis.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to encode a DBN record from Python objects and then decode the resulting DBN bytes. It covers creating metadata, encoding an MBO record, and decoding from both raw bytes and a file-like object. It also highlights checking for `UNDEF_TIMESTAMP` for fields like `ts_out`.

import io
import datetime
from databento_dbn import DBNDecoder, DBNEncoder, Metadata, Schema, SType, Compression, MBO, UNDEF_TIMESTAMP

NANO_SECONDS_IN_SECOND = 1_000_000_000

def to_nanos(dt: datetime.datetime) -> int:
    "Convert datetime object to nanoseconds since Unix epoch."
    return int(dt.timestamp() * NANO_SECONDS_IN_SECOND)

# 1. Create DBN metadata
metadata = Metadata(
    version=3,
    dataset="GLBX.MDP3",
    schema=Schema.MBO,
    stype_in=SType.RAW_SYMBOL,
    stype_out=SType.INSTRUMENT_ID,
    start=to_nanos(datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)),
    end=to_nanos(datetime.datetime(2024, 1, 1, 0, 0, 1, tzinfo=datetime.timezone.utc)),
    symbols=["ES.c.0"],
    partial=[0],
    not_found=[0],
    mappings=[],
    ts_out=False, # Set to False for this example to show UNDEF_TIMESTAMP
    compression=Compression.NONE,
)

# 2. Encode a sample MBO record into DBN bytes
encoder = DBNEncoder(metadata=metadata, upgrade_records=True)
sample_mbo = MBO(
    publisher_id=1,
    instrument_id=12345,
    ts_event=to_nanos(datetime.datetime(2024, 1, 1, 0, 0, 0, 123456789, tzinfo=datetime.timezone.utc)),
    action=b'A',
    side=b'B',
    price=100_00_000_000_000, # Represents 100.00 in fixed-point nanodollars
    size=10,
    depth=0,
    is_snapshot=1,
    ts_in_delta=0,
    sequence=1,
    booklevel=0,
    flags=0,
    display_qty=10,
    orders_count=1,
    ts_recv=to_nanos(datetime.datetime(2024, 1, 1, 0, 0, 0, 123456789, tzinfo=datetime.timezone.utc)),
    trade_size=0,
    trade_id=0,
    mbp_flags=0,
    channel_id=0,
)
encoded_bytes = encoder.encode_record(sample_mbo)
encoded_bytes += encoder.finish() # Finalize the stream

print(f"Encoded {len(encoded_bytes)} bytes of DBN data.")

# 3. Decode the DBN data from bytes
decoder = DBNDecoder()
decoded_records = []
for record in decoder.decode(encoded_bytes):
    decoded_records.append(record)

print(f"\nDecoded {len(decoded_records)} records from raw bytes.")
for record in decoded_records:
    print(record)
    if isinstance(record, MBO):
        print(f"  MBO Record: Instrument ID={record.instrument_id}, Price={record.price / NANO_SECONDS_IN_SECOND:.2f}, Size={record.size}")
        if record.ts_out == UNDEF_TIMESTAMP:
            print("  ts_out is undefined (as expected for this metadata configuration)")

# 4. Decode the DBN data from a file-like object
dbn_io = io.BytesIO(encoded_bytes)
file_decoder = DBNDecoder()
file_records = []
for record in file_decoder.decode(dbn_io):
    file_records.append(record)
print(f"\nDecoded {len(file_records)} records from BytesIO.")

view raw JSON →