{"id":"vector-store-ingest-and-query","version":"1.0.0","primitive":"code_execution","description":"Ingest documents into a vector store and query them by similarity","registry_refs":["chromadb"],"tags":["chromadb","vector-store","embeddings","semantic-search","retrieval","rag"],"solves":["deprecated Client(Settings()) pattern","non-idempotent collection creation on retry"],"auth_required":false,"verified":true,"last_verified":"2026-04-13","next_check":"2026-07-13","eval_result":"pass","eval_env":"python3.12/linux","mast":["FM-1.1","FM-2.2","FM-2.6","FM-3.2","FM-3.3"],"ref":"https://arxiv.org/abs/2503.13657","inputs":[],"executable":"# ============================================\n# checklist:     vector-store-ingest-and-query\n# version:       1.0.0\n# primitive:     code_execution\n# description:   Ingest documents into a vector store and query them by similarity\n# registry_refs: chromadb\n# auth_required: false\n# verified:      true\n# last_verified: 2026-04-13\n# next_check:    2026-07-13\n# eval_result:   pass\n# eval_env:      python3.12/linux\n#\n# tags:   chromadb, vector-store, embeddings, semantic-search, retrieval, rag\n# solves: deprecated Client(Settings()) pattern, non-idempotent collection creation on retry\n# mast: FM-1.1, FM-2.2, FM-2.6, FM-3.2, FM-3.3\n# ref:  https://arxiv.org/abs/2503.13657\n#\n# INPUTS:\n#   COLLECTION_NAME — string, ChromaDB collection name (default: \"checklist_test\")\n#   TEST_DOCUMENT   — string, document to ingest and query (default: \"hello world\")\n#   TEST_DOC_ID     — string, document ID (default: \"doc1\")\n#\n# OUTPUTS:\n#   document_ingested    — bool, document successfully added to collection\n#   count_verified       — bool, collection count matches expected\n#   exact_match_verified — bool, query returned exact document match\n# ============================================\n\nimport sys\nimport subprocess\nimport requests\n\n# ----------------------------------------\n# PRE_EXECUTION\n# FM-2.2: hard URL — no ambiguity, no hallucination\n# agent fetches ground truth before any execution\n# abort only if registry is truly unreachable\n# ----------------------------------------\n\nMAX_RETRIES = 2\nregistry = None\n\nfor attempt in range(MAX_RETRIES):\n    try:\n        response = requests.get(\n            \"https://checklist.day/api/registry/chromadb\",\n            timeout=10\n        )\n        if response.status_code == 200:\n            registry = response.json()\n            break\n    except requests.exceptions.RequestException:\n        pass\n\nassert registry is not None, \\\n    \"ABORT: registry fetch failed after 2 attempts — check network\"\n\nassert registry.get(\"imports\"), \\\n    \"ABORT: imports field missing from registry\"\nassert registry.get(\"install\"), \\\n    \"ABORT: install field missing from registry\"\nassert registry.get(\"warnings\"), \\\n    \"ABORT: warnings field missing from registry\"\n\n# FM-2.4: surface breaking warnings — do not withhold\nbreaking = [\n    w for w in registry[\"warnings\"]\n    if w.get(\"severity\") == \"breaking\"\n]\nif breaking:\n    print(f\"PRE_EXECUTION: {len(breaking)} breaking warning(s):\")\n    for w in breaking:\n        print(f\"  [!] [{w.get('affected_versions', 'all')}] {w['message'][:120]}\")\n        print(f\"      fix: {w['fix'][:100]}\")\n\nprint()\nprint(\"PRE_EXECUTION: registry verified ✓\")\nprint(f\"  install : {registry['install'][0]['cmd']}\")\nprint(f\"  symbol  : {registry['imports'][0]['symbol']}\")\n\n# ----------------------------------------\n# EXECUTION\n# FM-2.6: use correct client per registry imports\n# FM-1.1: get_or_create_collection is idempotent\n# philosophy: get it done — auto-install, no asking\n# ----------------------------------------\n\ntry:\n    import chromadb\nexcept ImportError:\n    pkg = registry['install'][0]['cmd'].replace(\"pip install \", \"\").strip()\n    print(f\"\\nEXECUTION: chromadb not found — installing {pkg}...\")\n    subprocess.check_call(\n        [sys.executable, \"-m\", \"pip\", \"install\", pkg]\n    )\n    print(f\"EXECUTION: {pkg} installed ✓\")\n    import chromadb\n\n# FM-2.6: EphemeralClient is correct — per registry imports\nclient = chromadb.EphemeralClient()\n\n# FM-1.1: get_or_create is idempotent — safe on retry\ncollection = client.get_or_create_collection(\n    name=\"checklist_test\"\n)\n\ncollection.add(\n    documents=[\"hello world\"],\n    ids=[\"doc1\"]\n)\n\nprint()\nprint(\"EXECUTION: document ingested ✓\")\n\n# ----------------------------------------\n# POST_EXECUTION\n# FM-3.2: verify count before querying\n# FM-3.3: exact match — 100% == PASS\n# ----------------------------------------\n\ncount = collection.count()\nassert count == 1, \\\n    f\"FAIL: document count mismatch — expected 1, got {count}\"\n\nresults = collection.query(\n    query_texts=[\"hello world\"],\n    n_results=1\n)\n\nreturned_doc = results[\"documents\"][0][0]\nreturned_id = results[\"ids\"][0][0]\n\nassert returned_doc == \"hello world\", \\\n    f\"FAIL: expected 'hello world', got '{returned_doc}'\"\n\nassert returned_id == \"doc1\", \\\n    f\"FAIL: expected 'doc1', got '{returned_id}'\"\n\nprint()\nprint(\"POST_EXECUTION: count verified ✓\")\nprint(\"POST_EXECUTION: exact match verified ✓\")\n\nresult = {\n    \"status\": \"pass\",\n    \"document_ingested\": True,\n    \"count_verified\": True,\n    \"exact_match_verified\": True,\n}\nprint(result)\nprint(\"PASS\")\n"}