Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import spaces | |
from torchvision.transforms import v2 | |
from torchcodec.decoders import VideoDecoder | |
from torchcodec.samplers import clips_at_random_indices | |
from transformers import VJEPA2VideoProcessor, VJEPA2ForVideoClassification | |
import torch | |
import torch.nn.functional as F | |
import numpy as np | |
# Load model and processor once at startup | |
MODEL_ID = "ariG23498/vjepa2-vitl-fpc16-256-ssv2-uvf101" | |
processor = VJEPA2VideoProcessor.from_pretrained(MODEL_ID) | |
model = VJEPA2ForVideoClassification.from_pretrained( | |
MODEL_ID, | |
torch_dtype="auto", | |
device_map="auto", | |
) | |
id2label = model.config.id2label | |
labels = list(id2label.values()) | |
# run on GPU zero | |
def classify_video(video_path): | |
# Decode and sample frames | |
decoder = VideoDecoder(video_path) | |
frame_idx = np.arange(0, model.config.frames_per_clip, 2) # you can define more complex sampling strategy | |
clip = decoder.get_frames_at(indices=frame_idx).data | |
# clip = clips_at_random_indices( | |
# decoder, | |
# num_clips=1, | |
# num_frames_per_clip=model.config.frames_per_clip, | |
# num_indices_between_frames=3, | |
# ).data | |
# Preprocess & move to the same device as the model | |
crop = v2.CenterCrop((processor.crop_size["height"], processor.crop_size["width"])) | |
inputs = processor(crop(clip), return_tensors="pt").to(model.device) | |
# Inference | |
model.eval() | |
with torch.no_grad(): | |
logits = model(**inputs).logits[0] | |
# Apply softmax for full probability distribution | |
probs = F.softmax(logits, dim=0) | |
# Build and return the label→confidence dict | |
confidences = { labels[i]: float(probs[i]) for i in range(len(labels)) } | |
return confidences | |
# Build and launch the interface | |
demo = gr.Interface( | |
fn=classify_video, | |
inputs=gr.Video(label="Upload Video"), | |
outputs=gr.Label(label="Class Probabilities"), | |
examples=["baby_crawling.mp4",], | |
title="UCF101 Video Classifier", | |
description="Upload a video clip to get full softmax confidences over UCF101 classes." | |
) | |
if __name__ == "__main__": | |
demo.launch() | |