django-chunkator

2.0.0 · active · verified Thu Apr 16

django-chunkator is a Python library for Django that allows iterating over large QuerySets in small, memory-efficient chunks, preventing high RAM consumption. The current version is 2.0.0, and the project actively maintains compatibility with recent Django and Python versions.

Common errors

Warnings

Install

Imports

Quickstart

Demonstrates how to use `chunkator` to iterate over a large Django QuerySet in manageable chunks. Note that for `values()` queries, the primary key field (e.g., 'pk' or 'id') must be explicitly included to avoid `MissingPkFieldException`.

import os
import django
from django.conf import settings
from django.db import models

# Minimal Django setup for demonstration
if not settings.configured:
    settings.configure(
        DEBUG=True,
        INSTALLED_APPS=[
            'django.contrib.auth',
            'django.contrib.contenttypes',
            'your_app_name', # Placeholder
        ],
        DATABASES={'default': {'ENGINE': 'django.db.backends.sqlite3', 'NAME': ':memory:'}},
    )
django.setup()

class LargeModel(models.Model):
    name = models.CharField(max_length=255)
    value = models.IntegerField()

    class Meta:
        app_label = 'your_app_name'

    def __str__(self):
        return self.name

# Create a mock app label if needed for standalone execution
try:
    from django.apps import apps
    apps.get_app_config('your_app_name')
except LookupError:
    class YourAppConfig(django.apps.AppConfig):
        name = 'your_app_name'
        label = 'your_app_name'
    apps.apps_ready.connect(YourAppConfig.ready)

# Example usage
from chunkator import chunkator

# To make the model usable, typically in a real Django project,
# you would have migrations and a database.
# For this quickstart, we'll bypass real DB creation and just demonstrate iteration.
# In a real scenario, you'd have LargeModel.objects.all()

# Simulate a QuerySet
class MockQuerySet:
    def __init__(self, data):
        self._data = data
    
    def all(self):
        return self

    def __iter__(self):
        yield from self._data
    
    def order_by(self, *args, **kwargs):
        return self # For demonstration, ignore ordering

    def values(self, *fields):
        if 'pk' not in fields and 'id' not in fields:
            raise ValueError("Mock MissingPkFieldException: 'pk' must be included in values() call.")
        return MockQuerySet([{
            field: getattr(item, field) if hasattr(item, field) else item.id if field == 'pk' else None
            for field in fields
        } for item in self._data])

mock_data = []
for i in range(1, 101):
    obj = LargeModel(id=i, name=f'Item {i}', value=i*10)
    obj.pk = i # Ensure pk is set for mock
    mock_data.append(obj)

mock_queryset = MockQuerySet(mock_data)

print("Iterating with chunkator (objects):")
for item in chunkator(mock_queryset, 20):
    # In a real Django app, item would be a model instance
    # print(f"Processing item: {item.name}")
    pass # do something with item
print("Done iterating objects.")

print("\nIterating with chunkator (values with pk):")
for item_dict in chunkator(mock_queryset.values('pk', 'name'), 25):
    # item_dict is a dictionary if .values() is used
    # print(f"Processing item dict: {item_dict['name']}")
    pass # do something with item_dict
print("Done iterating values.")

view raw JSON →