target-jsonl
target-jsonl is a Singer.io target that writes incoming data streams into JSON Line (.jsonl) files. It's designed to integrate seamlessly within the Singer ETL ecosystem, allowing data extracted by a 'tap' to be consumed and stored as line-delimited JSON. The current version is 0.1.4, and it has an irregular release cadence driven by contributions.
Warnings
- gotcha Prior to version 0.1.4, `target-jsonl` did not automatically create the `destination_path` directory if it did not already exist. This would lead to errors if the target directory was not manually prepared beforehand.
- gotcha Version 0.1.3 introduced an adjustment to decimal precision (defaulting to 2). If your pipeline implicitly relied on a different precision or lack of explicit rounding for numeric types, this change could alter your data. You can configure `decimal_precision` to a higher value or 'None' to disable rounding.
- gotcha As a file-based target, `target-jsonl` requires write permissions for the user running the process in the specified `destination_path`. Insufficient permissions will result in runtime errors.
Install
-
pip install target-jsonl
Quickstart
import json
import subprocess
import os
import shutil
# Create a dummy config file
config_path = "config.json"
output_dir = "./output_jsonl"
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
config = {
"destination_path": output_dir,
"custom_name": "my_data_stream",
"decimal_precision": 4
}
with open(config_path, "w") as f:
json.dump(config, f)
# Create dummy input data (mimicking a tap's output)
input_data = [
{"type": "SCHEMA", "stream": "users", "schema": {"properties": {"id": {"type": ["null", "integer"]}, "name": {"type": ["null", "string"]}, "value": {"type": ["null", "number"]}, "ts": {"type": ["null", "string"], "format": "date-time"}}}, "key_properties": ["id"]},
{"type": "RECORD", "stream": "users", "record": {"id": 1, "name": "Alice", "value": 100.123456, "ts": "2023-01-01T12:00:00Z"}},
{"type": "RECORD", "stream": "users", "record": {"id": 2, "name": "Bob", "value": 200.789, "ts": "2023-01-01T13:00:00Z"}}
]
# Simulate piping data to target-jsonl
try:
process = subprocess.Popen(
["target-jsonl", "--config", config_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
for line in input_data:
process.stdin.write(json.dumps(line) + "\n")
process.stdin.flush()
process.stdin.close()
stdout, stderr = process.communicate(timeout=10)
if process.returncode != 0:
print(f"Error running target-jsonl:\n{stderr}")
else:
print(f"target-jsonl ran successfully. Output files in {output_dir}")
# Verify output
output_file = os.path.join(output_dir, "my_data_stream-users.jsonl")
if os.path.exists(output_file):
print(f"Content of {output_file}:")
with open(output_file, "r") as f:
for line in f:
print(line.strip())
else:
print(f"Output file not found: {output_file}")
finally:
# Clean up
os.remove(config_path)
if os.path.exists(output_dir):
shutil.rmtree(output_dir)