vector-store-ingest-and-query
Ingest documents into a vector store and query them by similarity
import sys
import subprocess
import requests
# ----------------------------------------
# PRE_EXECUTION
# FM-2.2: hard URL — no ambiguity, no hallucination
# agent fetches ground truth before any execution
# abort only if registry is truly unreachable
# ----------------------------------------
MAX_RETRIES = 2
registry = None
for attempt in range(MAX_RETRIES):
try:
response = requests.get(
"https://checklist.day/api/registry/chromadb",
timeout=10
)
if response.status_code == 200:
registry = response.json()
break
except requests.exceptions.RequestException:
pass
assert registry is not None, \
"ABORT: registry fetch failed after 2 attempts — check network"
assert registry.get("imports"), \
"ABORT: imports field missing from registry"
assert registry.get("install"), \
"ABORT: install field missing from registry"
assert registry.get("warnings"), \
"ABORT: warnings field missing from registry"
# FM-2.4: surface breaking warnings — do not withhold
breaking = [
w for w in registry["warnings"]
if w.get("severity") == "breaking"
]
if breaking:
print(f"PRE_EXECUTION: {len(breaking)} breaking warning(s):")
for w in breaking:
print(f" [!] [{w.get('affected_versions', 'all')}] {w['message'][:120]}")
print(f" fix: {w['fix'][:100]}")
print()
print("PRE_EXECUTION: registry verified ✓")
print(f" install : {registry['install'][0]['cmd']}")
print(f" symbol : {registry['imports'][0]['symbol']}")
# ----------------------------------------
# EXECUTION
# FM-2.6: use correct client per registry imports
# FM-1.1: get_or_create_collection is idempotent
# philosophy: get it done — auto-install, no asking
# ----------------------------------------
try:
import chromadb
except ImportError:
pkg = registry['install'][0]['cmd'].replace("pip install ", "").strip()
print(f"\nEXECUTION: chromadb not found — installing {pkg}...")
subprocess.check_call(
[sys.executable, "-m", "pip", "install", pkg]
)
print(f"EXECUTION: {pkg} installed ✓")
import chromadb
# FM-2.6: EphemeralClient is correct — per registry imports
client = chromadb.EphemeralClient()
# FM-1.1: get_or_create is idempotent — safe on retry
collection = client.get_or_create_collection(
name="checklist_test"
)
collection.add(
documents=["hello world"],
ids=["doc1"]
)
print()
print("EXECUTION: document ingested ✓")
# ----------------------------------------
# POST_EXECUTION
# FM-3.2: verify count before querying
# FM-3.3: exact match — 100% == PASS
# ----------------------------------------
count = collection.count()
assert count == 1, \
f"FAIL: document count mismatch — expected 1, got {count}"
results = collection.query(
query_texts=["hello world"],
n_results=1
)
returned_doc = results["documents"][0][0]
returned_id = results["ids"][0][0]
assert returned_doc == "hello world", \
f"FAIL: expected 'hello world', got '{returned_doc}'"
assert returned_id == "doc1", \
f"FAIL: expected 'doc1', got '{returned_id}'"
print()
print("POST_EXECUTION: count verified ✓")
print("POST_EXECUTION: exact match verified ✓")
result = {
"status": "pass",
"document_ingested": True,
"count_verified": True,
"exact_match_verified": True,
}
print(result)
print("PASS")