import numpy as np
import matplotlib.pyplot as plt
import umap
from buildai import Client
client = Client()
# Grab clips from a few factories
factory_ids = ["factory-1", "factory-2", "factory-3"]
clip_ids = []
clip_factories = []
for fid in factory_ids:
clips = client.clips.list(factory_id=fid, page_size=50)
for clip in clips.items:
clip_ids.append(clip.clip_id)
clip_factories.append(fid)
# Fetch vectors in batches of 100
vectors = []
for i in range(0, len(clip_ids), 100):
batch = client.embeddings.batch(
entity_ids=clip_ids[i : i + 100],
entity_type="clip",
include_vector=True,
)
vectors.extend([item.vector for item in batch if item.vector])
X = np.array(vectors)
# Project to 2D
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, metric="cosine")
embedding_2d = reducer.fit_transform(X)
# Plot
fig, ax = plt.subplots(figsize=(10, 8))
for fid in factory_ids:
mask = [f == fid for f in clip_factories[: len(vectors)]]
ax.scatter(
embedding_2d[mask, 0],
embedding_2d[mask, 1],
label=fid,
s=12,
alpha=0.7,
)
ax.legend()
ax.set_title("Clip embeddings by factory")
plt.tight_layout()
plt.savefig("embedding_map.png", dpi=150)
plt.show()