Spaces:

ariG23498
/

video-classification-ucf101

Running on Zero

App Files Files Community

video-classification-ucf101 / app.py

ariG23498 HF Staff

Update app.py

ff9a584 verified 2 months ago

raw

history blame

2.11 kB

	import gradio as gr
	import spaces
	from torchvision.transforms import v2
	from torchcodec.decoders import VideoDecoder
	from torchcodec.samplers import clips_at_random_indices
	from transformers import VJEPA2VideoProcessor, VJEPA2ForVideoClassification
	import torch
	import torch.nn.functional as F
	import numpy as np

	# Load model and processor once at startup
	MODEL_ID = "ariG23498/vjepa2-vitl-fpc16-256-ssv2-uvf101"
	processor = VJEPA2VideoProcessor.from_pretrained(MODEL_ID)
	model = VJEPA2ForVideoClassification.from_pretrained(
	MODEL_ID,
	torch_dtype="auto",
	device_map="auto",
	)
	id2label = model.config.id2label
	labels = list(id2label.values())

	@spaces.GPU # run on GPU zero
	def classify_video(video_path):
	# Decode and sample frames
	decoder = VideoDecoder(video_path)

	frame_idx = np.arange(0, model.config.frames_per_clip, 2) # you can define more complex sampling strategy
	clip = decoder.get_frames_at(indices=frame_idx).data

	# clip = clips_at_random_indices(
	# decoder,
	# num_clips=1,
	# num_frames_per_clip=model.config.frames_per_clip,
	# num_indices_between_frames=3,
	# ).data

	# Preprocess & move to the same device as the model
	crop = v2.CenterCrop((processor.crop_size["height"], processor.crop_size["width"]))
	inputs = processor(crop(clip), return_tensors="pt").to(model.device)

	# Inference
	model.eval()
	with torch.no_grad():
	logits = model(**inputs).logits[0]
	# Apply softmax for full probability distribution
	probs = F.softmax(logits, dim=0)
	# Build and return the label→confidence dict
	confidences = { labels[i]: float(probs[i]) for i in range(len(labels)) }
	return confidences

	# Build and launch the interface
	demo = gr.Interface(
	fn=classify_video,
	inputs=gr.Video(label="Upload Video"),
	outputs=gr.Label(label="Class Probabilities"),
	examples=["baby_crawling.mp4",],
	title="UCF101 Video Classifier",
	description="Upload a video clip to get full softmax confidences over UCF101 classes."
	)

	if __name__ == "__main__":
	demo.launch()