Datasets
Released datasets are versioned collections with WebDataset shards on Hugging Face. Stream them directly into your training pipeline.List datasets
- Python
- curl
Copy
Ask AI
datasets = client.datasets.list()
for ds in datasets.items:
print(ds.name, ds.clip_count)
Copy
Ask AI
curl -s https://api.data.build.ai/v1/datasets \
-H "X-API-Key: $BUILDAI_API_KEY"
Get dataset details
- Python
- curl
Copy
Ask AI
dataset = client.datasets.get("egocentric-100k")
print(dataset.name, dataset.clip_count)
Copy
Ask AI
curl -s https://api.data.build.ai/v1/datasets/egocentric-100k \
-H "X-API-Key: $BUILDAI_API_KEY"
Browse clips
View the actual video and frames before you train on anything.Copy
Ask AI
from IPython.display import Image, display, Video
# Pick a clip
clips = client.clips.list(factory_id="FACTORY_ID", page_size=5)
clip = clips.items[0]
# Watch it
video = client.clips.video(clip.clip_id)
print(video.signed_url) # paste in browser, or:
display(Video(url=video.signed_url, width=600))
# See individual frames
frames = client.clips.frames(clip.clip_id, page_size=8)
for frame in frames.items:
print(f"frame {frame.frame_index} @ {frame.timestamp_seconds}s")
display(Image(url=frame.image_url, width=400))
WebDataset shard URLs
Each dataset is exported as.tar shards. Each shard contains MP4 clips + JSON metadata.
- Python
- curl
Copy
Ask AI
urls = client.datasets.webdataset_urls("egocentric-100k")
print(f"{len(urls)} shards")
# Or page through shard metadata
shards = client.datasets.list_webdataset_artifacts("egocentric-100k", page_size=10)
for shard in shards.items:
print(shard.shard_path, shard.clip_count, shard.size_bytes)
Copy
Ask AI
curl -s "https://api.data.build.ai/v1/datasets/egocentric-100k/artifacts/webdataset?page_size=5" \
-H "X-API-Key: $BUILDAI_API_KEY"
Shard format
Copy
Ask AI
shard-000000.tar
├── clip-00000000.mp4 # H.265 video clip
├── clip-00000000.json # metadata (factory, worker, timestamps, duration)
├── clip-00000001.mp4
├── clip-00000001.json
└── ...
Train: fine-tune VideoMAE on factory footage
Classify what a worker is doing — soldering, assembly, inspection, etc. Uses decord for video decoding and VideoMAE from Hugging Face.Copy
Ask AI
pip install buildai-sdk webdataset decord transformers torch matplotlib scikit-learn
Copy
Ask AI
import io
import json
import torch
import webdataset as wds
from buildai import Client
from decord import VideoReader, cpu
from transformers import VideoMAEForVideoClassification, VideoMAEImageProcessor
# --- 1. Get shard URLs from Build AI ---
client = Client()
urls = client.datasets.webdataset_urls("egocentric-100k")
print(f"Training on {len(urls)} shards")
# --- 2. Define action labels ---
ACTIONS = ["soldering", "assembly", "inspection", "packaging", "tool_change", "idle"]
label_map = {a: i for i, a in enumerate(ACTIONS)}
# --- 3. Video decoding + preprocessing ---
processor = VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base")
def decode_sample(sample):
video_bytes = sample["mp4"]
metadata = json.loads(sample["json"])
vr = VideoReader(io.BytesIO(video_bytes), ctx=cpu(0))
indices = torch.linspace(0, len(vr) - 1, 16).long().tolist()
frames = [vr[i].asnumpy() for i in indices]
inputs = processor(frames, return_tensors="pt")
label = label_map.get(metadata.get("action", "idle"), 5)
return inputs["pixel_values"].squeeze(0), torch.tensor(label)
# --- 4. Build streaming dataloader ---
dataset = (
wds.WebDataset(urls, shardshuffle=True)
.shuffle(500)
.decode()
.map(decode_sample)
.batched(4)
)
loader = wds.WebLoader(dataset, batch_size=None, num_workers=4)
# --- 5. Fine-tune ---
model = VideoMAEForVideoClassification.from_pretrained(
"MCG-NJU/videomae-base",
num_labels=len(ACTIONS),
ignore_mismatched_sizes=True,
)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
losses = []
for step, (pixel_values, labels) in enumerate(loader):
outputs = model(pixel_values=pixel_values, labels=labels)
outputs.loss.backward()
optimizer.step()
optimizer.zero_grad()
losses.append(outputs.loss.item())
if step % 10 == 0:
print(f"step {step} — loss: {outputs.loss.item():.4f}")
Visualize training
Loss curve
Copy
Ask AI
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.plot(losses, linewidth=0.8)
plt.xlabel("Step")
plt.ylabel("Loss")
plt.title("VideoMAE fine-tuning loss")
plt.tight_layout()
plt.savefig("training_loss.png", dpi=150)
plt.show()
Confusion matrix
Run inference on a held-out shard and plot predictions vs ground truth.Copy
Ask AI
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
model.eval()
all_preds, all_labels = [], []
eval_dataset = (
wds.WebDataset(urls[-1:]) # hold out last shard
.decode()
.map(decode_sample)
.batched(4)
)
with torch.no_grad():
for pixel_values, labels in wds.WebLoader(eval_dataset, batch_size=None):
logits = model(pixel_values=pixel_values).logits
preds = logits.argmax(dim=-1)
all_preds.extend(preds.tolist())
all_labels.extend(labels.tolist())
cm = confusion_matrix(all_labels, all_preds, labels=range(len(ACTIONS)))
disp = ConfusionMatrixDisplay(cm, display_labels=ACTIONS)
fig, ax = plt.subplots(figsize=(8, 8))
disp.plot(ax=ax, cmap="Blues", xticks_rotation=45)
plt.title("Action classification — confusion matrix")
plt.tight_layout()
plt.savefig("confusion_matrix.png", dpi=150)
plt.show()
Sample predictions
Copy
Ask AI
import matplotlib.pyplot as plt
model.eval()
samples = wds.WebDataset(urls[-1:]).decode()
fig, axes = plt.subplots(2, 4, figsize=(16, 6))
for idx, sample in enumerate(samples):
if idx >= 4:
break
pixel_values, label = decode_sample(sample)
with torch.no_grad():
logits = model(pixel_values=pixel_values.unsqueeze(0)).logits
pred = logits.argmax(dim=-1).item()
vr = VideoReader(io.BytesIO(sample["mp4"]), ctx=cpu(0))
axes[0, idx].imshow(vr[0].asnumpy())
axes[0, idx].set_title(f"true: {ACTIONS[label]}", fontsize=9)
axes[0, idx].axis("off")
axes[1, idx].imshow(vr[len(vr) // 2].asnumpy())
color = "green" if pred == label else "red"
axes[1, idx].set_title(f"pred: {ACTIONS[pred]}", fontsize=9, color=color)
axes[1, idx].axis("off")
plt.suptitle("Sample predictions", fontsize=13)
plt.tight_layout()
plt.savefig("sample_predictions.png", dpi=150)
plt.show()