""" ResNet18 AI Image Detector Model Hugging Face compatible implementation """ import torch import torch.nn as nn from typing import Optional, Dict, Any from transformers import PreTrainedModel from transformers.modeling_outputs import ImageClassifierOutput from torchvision.models import resnet18, ResNet18_Weights from .config import ResNet18DetectorConfig class ResNet18Detector(PreTrainedModel): """ ResNet18 based AI Image Detector compatible with Hugging Face This model detects whether an input image is AI-generated or real. """ config_class = ResNet18DetectorConfig def __init__(self, config: ResNet18DetectorConfig): super().__init__(config) self.num_labels = config.num_classes self.config = config # Initialize ResNet18 backbone weights = getattr(ResNet18_Weights, config.pretrained_weights, ResNet18_Weights.IMAGENET1K_V1) self.backbone = resnet18(weights=weights) # Replace the final fully connected layer in_features = self.backbone.fc.in_features self.backbone.fc = nn.Sequential( nn.Dropout(config.dropout_rate), nn.Linear(in_features, config.num_classes) ) # Freeze backbone if specified if config.freeze_backbone: for param in self.backbone.parameters(): param.requires_grad = False # Keep the final layer trainable for param in self.backbone.fc.parameters(): param.requires_grad = True def forward( self, pixel_values: Optional[torch.Tensor] = None, labels: Optional[torch.Tensor] = None, return_dict: Optional[bool] = None, **kwargs ) -> ImageClassifierOutput: """ Forward pass of the model Args: pixel_values: Input images tensor of shape (batch_size, 3, 224, 224) labels: Ground truth labels for computing loss return_dict: Whether to return ModelOutput instead of tuple Returns: ImageClassifierOutput containing loss and logits """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict if pixel_values is None: raise ValueError("pixel_values must be provided") # Forward pass through ResNet logits = self.backbone(pixel_values) loss = None if labels is not None: # Compute cross-entropy loss loss_fct = nn.CrossEntropyLoss() loss = loss_fct(logits, labels) if not return_dict: output = (logits,) return ((loss,) + output) if loss is not None else output return ImageClassifierOutput( loss=loss, logits=logits, ) def extract_features(self, pixel_values: torch.Tensor) -> torch.Tensor: """ Extract features before the final classification layer Args: pixel_values: Input images tensor Returns: Feature tensor from the backbone """ # Forward through all layers except the final FC x = self.backbone.conv1(pixel_values) x = self.backbone.bn1(x) x = self.backbone.relu(x) x = self.backbone.maxpool(x) x = self.backbone.layer1(x) x = self.backbone.layer2(x) x = self.backbone.layer3(x) x = self.backbone.layer4(x) x = self.backbone.avgpool(x) features = torch.flatten(x, 1) return features def predict(self, pixel_values: torch.Tensor) -> Dict[str, Any]: """ Make predictions with confidence scores Args: pixel_values: Input images tensor Returns: Dictionary with predictions and confidence scores """ self.eval() with torch.no_grad(): outputs = self.forward(pixel_values) logits = outputs.logits probs = torch.softmax(logits, dim=-1) predictions = torch.argmax(probs, dim=-1) # Convert to readable labels labels = ["Real", "AI-generated"] results = [] for i in range(len(predictions)): pred_idx = predictions[i].item() confidence = probs[i, pred_idx].item() results.append({ "label": labels[pred_idx], "confidence": confidence, "probabilities": { "Real": probs[i, 0].item(), "AI-generated": probs[i, 1].item() } }) return results if len(results) > 1 else results[0] # Register the model for auto loading ResNet18DetectorConfig.register_for_auto_class() ResNet18Detector.register_for_auto_class("AutoModelForImageClassification")