|
import timm |
|
import torch |
|
from PIL import Image |
|
from torchvision import transforms |
|
import requests |
|
from io import BytesIO |
|
|
|
|
|
def load_model(): |
|
"""Load the pre-trained model.""" |
|
model = timm.create_model("hf_hub:timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k_inat21", pretrained=True) |
|
model.eval() |
|
return model |
|
|
|
|
|
def get_label_names(): |
|
"""Fetch the class labels from the Hugging Face Hub.""" |
|
config_url = "https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k_inat21/resolve/main/config.json" |
|
response = requests.get(config_url) |
|
response.raise_for_status() |
|
config = response.json() |
|
return config["label_names"] |
|
|
|
|
|
def preprocess_image(image_url): |
|
"""Fetch and preprocess the image.""" |
|
preprocess = transforms.Compose([ |
|
transforms.Resize(336), |
|
transforms.CenterCrop(336), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), |
|
]) |
|
|
|
response = requests.get(image_url) |
|
response.raise_for_status() |
|
image = Image.open(BytesIO(response.content)) |
|
input_tensor = preprocess(image).unsqueeze(0) |
|
return input_tensor |
|
|
|
|
|
def predict_species(model, image_url, label_names): |
|
"""Make a prediction using the model.""" |
|
input_tensor = preprocess_image(image_url) |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model = model.to(device) |
|
input_tensor = input_tensor.to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
output = model(input_tensor) |
|
_, predicted_class = torch.max(output, 1) |
|
|
|
|
|
predicted_species = label_names[predicted_class.item()] |
|
return predicted_species |
|
|