openai-embeddings
OpenAI API key starting with sk-
import sys
import os
import subprocess
import time
import urllib.request
import json
import math
# ─────────────────────────────────────────
# PRE_EXECUTION
# ─────────────────────────────────────────
for attempt in range(2):
try:
req = urllib.request.Request(
"https://checklist.day/api/registry/openai",
headers={"User-Agent": "checklist-agent/1.0"}
)
with urllib.request.urlopen(req, timeout=10) as resp:
registry = json.loads(resp.read())
break
except Exception as e:
if attempt == 1:
print(f"ABORT: registry unreachable — {e}")
sys.exit(1)
time.sleep(2)
warnings = registry.get("warnings", [])
if warnings:
print("[openai] WARNINGS:")
for w in warnings if isinstance(warnings, list) else [warnings]:
print(f" ⚠ {w}")
# ─────────────────────────────────────────
# EXECUTION
# ─────────────────────────────────────────
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "openai>=1.0.0"])
from openai import OpenAI
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
if not OPENAI_API_KEY:
print("ABORT: OPENAI_API_KEY env var not set")
sys.exit(1)
client = OpenAI(api_key=OPENAI_API_KEY)
# FOOTGUN: model name is "text-embedding-3-small", not "text-embedding-ada-002" (legacy)
# FOOTGUN: text-embedding-3-small = 1536 dims, text-embedding-3-large = 3072 dims
MODEL = "text-embedding-3-small"
EXPECTED_DIMS = 1536
texts = [
"The cat sat on the mat",
"A feline rested on a rug", # semantically similar
"The stock market crashed", # semantically different
]
# FOOTGUN: batch limit is 2048 inputs per request — don't exceed
# FOOTGUN: input must be list of strings, not a single string
response = client.embeddings.create(
model=MODEL,
input=texts,
)
embeddings = [item.embedding for item in response.data]
dimensions = len(embeddings[0])
batch_size = len(response.data)
print(f" model: {response.model}")
print(f" dimensions: {dimensions}")
print(f" batch size: {batch_size}")
def cosine_similarity(a: list, b: list) -> float:
dot = sum(x * y for x, y in zip(a, b))
mag_a = math.sqrt(sum(x * x for x in a))
mag_b = math.sqrt(sum(x * x for x in b))
return dot / (mag_a * mag_b) if mag_a and mag_b else 0.0
sim_similar = cosine_similarity(embeddings[0], embeddings[1])
sim_different = cosine_similarity(embeddings[0], embeddings[2])
similarity_ok = sim_similar > sim_different
print(f" sim(cat/feline): {sim_similar:.3f}")
print(f" sim(cat/stocks): {sim_different:.3f}")
print(f" similarity_ok: {similarity_ok}")
# ─────────────────────────────────────────
# POST_EXECUTION
# ─────────────────────────────────────────
assert dimensions == EXPECTED_DIMS, f"FAIL: expected {EXPECTED_DIMS} dims, got {dimensions}"
assert batch_size == len(texts), f"FAIL: expected {len(texts)} embeddings, got {batch_size}"
assert similarity_ok, f"FAIL: similar texts ({sim_similar:.3f}) should score higher than different ({sim_different:.3f})"
result = {
"model": response.model,
"dimensions": dimensions,
"batch_size": batch_size,
"similarity_ok": similarity_ok,
}
print(json.dumps(result, indent=2))
print("PASS")