target-jsonl

0.1.4 · active · verified Mon Apr 13

target-jsonl is a Singer.io target that writes incoming data streams into JSON Line (.jsonl) files. It's designed to integrate seamlessly within the Singer ETL ecosystem, allowing data extracted by a 'tap' to be consumed and stored as line-delimited JSON. The current version is 0.1.4, and it has an irregular release cadence driven by contributions.

Warnings

Install

Quickstart

This quickstart demonstrates how to run `target-jsonl` by simulating a Singer tap piping data to it. It creates a configuration file, generates dummy Singer messages (SCHEMA and RECORD), pipes them to the target, and then verifies the output JSONL file. The `destination_path` and `custom_name` are configured, and the decimal precision is explicitly set.

import json
import subprocess
import os
import shutil

# Create a dummy config file
config_path = "config.json"
output_dir = "./output_jsonl"

if os.path.exists(output_dir):
    shutil.rmtree(output_dir)

config = {
    "destination_path": output_dir,
    "custom_name": "my_data_stream",
    "decimal_precision": 4
}
with open(config_path, "w") as f:
    json.dump(config, f)

# Create dummy input data (mimicking a tap's output)
input_data = [
    {"type": "SCHEMA", "stream": "users", "schema": {"properties": {"id": {"type": ["null", "integer"]}, "name": {"type": ["null", "string"]}, "value": {"type": ["null", "number"]}, "ts": {"type": ["null", "string"], "format": "date-time"}}}, "key_properties": ["id"]},
    {"type": "RECORD", "stream": "users", "record": {"id": 1, "name": "Alice", "value": 100.123456, "ts": "2023-01-01T12:00:00Z"}},
    {"type": "RECORD", "stream": "users", "record": {"id": 2, "name": "Bob", "value": 200.789, "ts": "2023-01-01T13:00:00Z"}}
]

# Simulate piping data to target-jsonl
try:
    process = subprocess.Popen(
        ["target-jsonl", "--config", config_path],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True
    )

    for line in input_data:
        process.stdin.write(json.dumps(line) + "\n")
    process.stdin.flush()
    process.stdin.close()

    stdout, stderr = process.communicate(timeout=10)

    if process.returncode != 0:
        print(f"Error running target-jsonl:\n{stderr}")
    else:
        print(f"target-jsonl ran successfully. Output files in {output_dir}")
        # Verify output
        output_file = os.path.join(output_dir, "my_data_stream-users.jsonl")
        if os.path.exists(output_file):
            print(f"Content of {output_file}:")
            with open(output_file, "r") as f:
                for line in f:
                    print(line.strip())
        else:
            print(f"Output file not found: {output_file}")

finally:
    # Clean up
    os.remove(config_path)
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)

view raw JSON →