{"id":"pinecone-ingest-and-query","version":"1.0.0","primitive":"tool_calling/api","description":"Ingest documents into Pinecone serverless and query by similarity","registry_refs":["pinecone","sentence-transformers"],"tags":["pinecone","vector-store","embeddings","semantic-search","retrieval","rag","serverless"],"solves":["legacy pinecone.init() pattern removed in v3","wrong index creation for serverless vs pod","upsert batching missing","query without namespace","dimension mismatch on upsert"],"auth_required":true,"verified":true,"last_verified":"2026-04-14","next_check":"2026-07-14","eval_result":"null","eval_env":"null","mast":["FM-1.1","FM-2.2","FM-2.4","FM-2.6","FM-3.2","FM-3.3"],"ref":"https://arxiv.org/abs/2503.13657","inputs":[],"executable":"# ============================================\n# checklist:     pinecone-ingest-and-query\n# version:       1.0.0\n# primitive:     tool_calling/api\n# description:   Ingest documents into Pinecone serverless and query by similarity\n# registry_refs: pinecone, sentence-transformers\n# auth_required: true\n# verified:      true\n# last_verified: 2026-04-14\n# next_check:    2026-07-14\n# eval_result:   null\n# eval_env:      null\n#\n# tags:     pinecone, vector-store, embeddings, semantic-search, retrieval, rag, serverless\n# solves:   legacy pinecone.init() pattern removed in v3, wrong index creation for serverless vs pod, upsert batching missing, query without namespace, dimension mismatch on upsert\n# env_vars: PINECONE_API_KEY\n# mast:     FM-1.1, FM-2.2, FM-2.4, FM-2.6, FM-3.2, FM-3.3\n# ref:      https://arxiv.org/abs/2503.13657\n#\n# INPUTS:\n#   PINECONE_API_KEY — string, Pinecone API key (env var, required — https://app.pinecone.io)\n#   INDEX_NAME       — string, Pinecone index name (default: \"checklist-day-test\")\n#   NAMESPACE        — string, Pinecone namespace (default: \"checklist-test\")\n#   EMBEDDING_MODEL  — string, sentence-transformers model (default: \"all-MiniLM-L6-v2\")\n#\n# OUTPUTS:\n#   index_ready          — bool, index reached ready state within timeout\n#   vectors_upserted     — int, number of vectors confirmed in namespace\n#   top_match_verified   — bool, query returned doc1 as top result\n#   similarity_score     — float, cosine similarity of top match (expected > 0.7)\n#   index_count_verified — bool, final vector count matches documents ingested\n# ============================================\n\nimport os\nimport sys\nimport time\nimport subprocess\nimport requests as _requests\n\n# ----------------------------------------\n# ENV_CHECK\n# all credentials verified before any execution\n# agent must confirm these are present before proceeding\n# ----------------------------------------\n\nREQUIRED_ENV = {\n    \"PINECONE_API_KEY\": \"Pinecone API key — https://app.pinecone.io (free tier, no credit card)\",\n}\n\nmissing = [k for k in REQUIRED_ENV if not os.environ.get(k)]\nif missing:\n    print(\"ENV_CHECK: missing required credentials:\")\n    for k in missing:\n        print(f\"  {k} — {REQUIRED_ENV[k]}\")\n    sys.exit(1)\n\nprint(\"ENV_CHECK: credentials verified ✓\")\nfor k in REQUIRED_ENV:\n    v = os.environ[k]\n    print(f\"  {k}: ...{v[-4:]}\")\n\n# ----------------------------------------\n# PRE_EXECUTION\n# FM-2.2: fetch ground truth for all registry_refs\n# ----------------------------------------\n\nREGISTRY_REFS = [\"pinecone\", \"openai\", \"sentence-transformers\"]\nMAX_RETRIES = 2\nregistries = {}\n\nfor lib in REGISTRY_REFS:\n    for attempt in range(MAX_RETRIES):\n        try:\n            response = _requests.get(\n                f\"https://checklist.day/api/registry/{lib}\",\n                timeout=10\n            )\n            if response.status_code == 200:\n                registries[lib] = response.json()\n                break\n        except _requests.exceptions.RequestException:\n            pass\n\nfor lib in REGISTRY_REFS:\n    assert lib in registries, \\\n        f\"ABORT: registry fetch failed for {lib} after {MAX_RETRIES} attempts\"\n\n# FM-2.4: surface breaking warnings — do not withhold\nfor lib, registry in registries.items():\n    breaking = [\n        w for w in registry.get(\"warnings\", [])\n        if w.get(\"severity\") == \"breaking\"\n    ]\n    if breaking:\n        print(f\"PRE_EXECUTION: {lib} has {len(breaking)} breaking warning(s):\")\n        for w in breaking:\n            print(f\"  [!] [{w.get('affected_versions', 'all')}] {w['message'][:120]}\")\n            print(f\"      fix: {w['fix'][:100]}\")\n\nprint()\nprint(\"PRE_EXECUTION: all registry refs verified ✓\")\nfor lib, registry in registries.items():\n    install = registry.get(\"install\", [{}])[0].get(\"cmd\", \"unknown\")\n    print(f\"  {lib:25s} : {install}\")\n\nPINECONE_API_KEY = os.environ[\"PINECONE_API_KEY\"]  # guaranteed present by ENV_CHECK\n\n# ----------------------------------------\n# KNOWN FAILURE MODES\n#\n# 1. pinecone.init() — REMOVED in v3. The old pattern:\n#       import pinecone\n#       pinecone.init(api_key=..., environment=...)  # BREAKS in v3+\n#    Correct pattern: Pinecone(api_key=...)\n#\n# 2. Serverless index creation requires cloud + region, not environment string:\n#       ServerlessSpec(cloud=\"aws\", region=\"us-east-1\")  # correct\n#       NOT: PodSpec(environment=\"us-east1-gcp\")          # pod-based, different billing\n#\n# 3. Dimension mismatch — embedding model output dim must match index dim exactly.\n#    all-MiniLM-L6-v2 = 384 dims. text-embedding-ada-002 = 1536 dims.\n#    Mismatch causes silent failures or explicit errors on upsert.\n#\n# 4. Upsert without batching — Pinecone recommends max 100 vectors per upsert.\n#    Large batches without chunking cause timeouts.\n#\n# 5. Query immediately after upsert — Pinecone is eventually consistent.\n#    Always wait for index stats to confirm upsert before querying.\n#\n# 6. Missing namespace — queries without namespace return results from all namespaces.\n#    Always be explicit.\n# ----------------------------------------\n\nEMBEDDING_MODEL = \"all-MiniLM-L6-v2\"  # 384 dims, runs locally, no API key needed\nEMBEDDING_DIM = 384\nINDEX_NAME = \"checklist-day-test\"\nNAMESPACE = \"checklist-test\"\nUPSERT_BATCH_SIZE = 100\nINDEX_WAIT_TIMEOUT = 60  # seconds\n\n\n# ----------------------------------------\n# EXECUTION\n# FM-2.6: use Pinecone(api_key=...) — not pinecone.init()\n# FM-1.1: create_index with get_or_create pattern — idempotent\n# ----------------------------------------\n\ntry:\n    from pinecone import Pinecone, ServerlessSpec\nexcept ImportError:\n    pkg = registries[\"pinecone\"][\"install\"][0][\"cmd\"].replace(\"pip install \", \"\").strip()\n    print(f\"\\nEXECUTION: pinecone not found — installing {pkg}...\")\n    subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", pkg])\n    print(f\"EXECUTION: {pkg} installed ✓\")\n    from pinecone import Pinecone, ServerlessSpec\n\ntry:\n    from sentence_transformers import SentenceTransformer\nexcept ImportError:\n    pkg = registries[\"sentence-transformers\"][\"install\"][0][\"cmd\"].replace(\"pip install \", \"\").strip()\n    print(f\"\\nEXECUTION: sentence-transformers not found — installing {pkg}...\")\n    subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", pkg])\n    print(f\"EXECUTION: {pkg} installed ✓\")\n    from sentence_transformers import SentenceTransformer\n\nprint()\nprint(\"EXECUTION: initializing Pinecone client...\")\n\n# FM-2.6: correct v3+ init — not pinecone.init()\npc = Pinecone(api_key=PINECONE_API_KEY)\n\n# FM-1.1: get_or_create pattern — safe on retry\nexisting_indexes = [i.name for i in pc.list_indexes()]\nif INDEX_NAME not in existing_indexes:\n    print(f\"EXECUTION: creating serverless index '{INDEX_NAME}'...\")\n    pc.create_index(\n        name=INDEX_NAME,\n        dimension=EMBEDDING_DIM,       # FM-2.6: must match embedding model exactly\n        metric=\"cosine\",\n        spec=ServerlessSpec(            # FM-2.6: serverless requires ServerlessSpec, not PodSpec\n            cloud=\"aws\",\n            region=\"us-east-1\"\n        )\n    )\n    print(f\"EXECUTION: index created ✓\")\nelse:\n    print(f\"EXECUTION: index '{INDEX_NAME}' already exists ✓\")\n\n# Wait for index to be ready\nprint(\"EXECUTION: waiting for index to be ready...\")\ndeadline = time.time() + INDEX_WAIT_TIMEOUT\nwhile time.time() < deadline:\n    status = pc.describe_index(INDEX_NAME).status\n    if status.get(\"ready\"):\n        break\n    time.sleep(2)\nelse:\n    raise TimeoutError(\n        f\"ABORT: index '{INDEX_NAME}' not ready after {INDEX_WAIT_TIMEOUT}s\"\n    )\n\nprint(\"EXECUTION: index ready ✓\")\n\nindex = pc.Index(INDEX_NAME)\n\n# Load embedding model\nprint(f\"EXECUTION: loading embedding model '{EMBEDDING_MODEL}'...\")\nmodel = SentenceTransformer(EMBEDDING_MODEL)\nprint(\"EXECUTION: model loaded ✓\")\n\n# Documents to ingest\ndocuments = [\n    {\"id\": \"doc1\", \"text\": \"checklist.day is a machine-readable registry for AI agents\"},\n    {\"id\": \"doc2\", \"text\": \"Pinecone is a managed vector database for production workloads\"},\n    {\"id\": \"doc3\", \"text\": \"RAG retrieval augmented generation improves LLM accuracy\"},\n]\n\n# Embed\nprint(f\"EXECUTION: embedding {len(documents)} documents...\")\nembeddings = model.encode([d[\"text\"] for d in documents])\n\n# FM-1.1: upsert in batches — idempotent, same IDs overwrite\nvectors = [\n    {\n        \"id\": doc[\"id\"],\n        \"values\": emb.tolist(),\n        \"metadata\": {\"text\": doc[\"text\"]}\n    }\n    for doc, emb in zip(documents, embeddings)\n]\n\n# Upsert in batches of UPSERT_BATCH_SIZE\nfor i in range(0, len(vectors), UPSERT_BATCH_SIZE):\n    batch = vectors[i:i + UPSERT_BATCH_SIZE]\n    index.upsert(vectors=batch, namespace=NAMESPACE)\n\nprint(f\"EXECUTION: {len(vectors)} vectors upserted ✓\")\n\n# FM-3.2: wait for upsert to be reflected in stats before querying\n# Pinecone is eventually consistent — do not query immediately\nprint(\"EXECUTION: waiting for upsert to be indexed...\")\ndeadline = time.time() + 30\nwhile time.time() < deadline:\n    stats = index.describe_index_stats()\n    ns = stats.namespaces.get(NAMESPACE)\n    ns_count = ns.vector_count if ns else 0\n    if ns_count >= len(documents):\n        break\n    time.sleep(2)\nelse:\n    raise TimeoutError(\n        f\"ABORT: upserted {len(documents)} vectors but only {ns_count} visible after 30s\"\n    )\n\nprint(f\"EXECUTION: {ns_count} vectors confirmed in namespace '{NAMESPACE}' ✓\")\n\n# Query\nquery_text = \"machine readable registry for agents\"\nquery_embedding = model.encode([query_text])[0].tolist()\n\nprint(f\"\\nEXECUTION: querying — '{query_text}'\")\nresults = index.query(\n    vector=query_embedding,\n    top_k=1,\n    namespace=NAMESPACE,           # FM-2.6: always specify namespace\n    include_metadata=True\n)\n\nmatches = results.matches  # Pinecone v3 returns QueryResponse object, not plain dict\n\nprint(f\"EXECUTION: query complete ✓\")\nprint(f\"  top match: {matches[0].metadata['text']}\")\nprint(f\"  score    : {matches[0].score:.4f}\")\n\n# ----------------------------------------\n# POST_EXECUTION\n# FM-3.2: verify match count before asserting\n# FM-3.3: exact match on expected top result\n# ----------------------------------------\n\nassert len(matches) == 1, \\\n    f\"FAIL: expected 1 match, got {len(matches)}\"\n\ntop_match = matches[0]\n\nassert top_match.id == \"doc1\", \\\n    f\"FAIL: expected top match 'doc1', got '{top_match.id}'\"\n\nassert top_match.score > 0.7, \\\n    f\"FAIL: similarity score too low — expected >0.7, got {top_match.score:.4f}\"\n\nassert top_match.metadata[\"text\"] == documents[0][\"text\"], \\\n    f\"FAIL: metadata text mismatch\"\n\n# Verify index stats\nstats = index.describe_index_stats()\nfinal_ns = stats.namespaces.get(NAMESPACE)\nfinal_count = final_ns.vector_count if final_ns else 0\nassert final_count == len(documents), \\\n    f\"FAIL: expected {len(documents)} vectors in index, got {final_count}\"\n\nprint()\nprint(\"POST_EXECUTION: match count verified ✓\")\nprint(f\"POST_EXECUTION: top match is doc1 ✓  (score={top_match.score:.4f})\")\nprint(\"POST_EXECUTION: metadata verified ✓\")\nprint(f\"POST_EXECUTION: index count verified ✓  ({final_count}/{len(documents)} vectors)\")\n\nresult = {\n    \"status\": \"pass\",\n    \"index_ready\": True,\n    \"vectors_upserted\": ns_count,\n    \"top_match_verified\": True,\n    \"similarity_score\": float(top_match.score),\n    \"index_count_verified\": True,\n}\nprint(result)\nprint(\"PASS\")\n"}