# For more details on models, see https://pytorch.org/vision/main/models.html # EfficientNet models: Designed for efficiency with compound scaling of depth, width, and resolution. # These models balance accuracy and computational efficiency, ideal for resource-constrained environments. efficientnet_b0: metrics: Acc@1: 77.692 # Top-1 accuracy on ImageNet Acc@5: 93.532 # Top-5 accuracy on ImageNet GFLOPS: 0.39 # Computational complexity Params: 5.3M # Number of parameters model_fn: models.efficientnet_b0 resolution: 224 # Input image resolution weights: models.EfficientNet_B0_Weights.IMAGENET1K_V1 # Pretrained weights on ImageNet efficientnet_b1: metrics: Acc@1: 78.642 Acc@5: 94.186 GFLOPS: 0.69 Params: 7.8M model_fn: models.efficientnet_b1 resolution: 240 weights: models.EfficientNet_B1_Weights.IMAGENET1K_V1 efficientnet_b2: metrics: Acc@1: 80.608 Acc@5: 95.31 GFLOPS: 1.09 Params: 9.1M model_fn: models.efficientnet_b2 resolution: 260 weights: models.EfficientNet_B2_Weights.IMAGENET1K_V1 efficientnet_b3: metrics: Acc@1: 82.008 Acc@5: 96.054 GFLOPS: 1.83 Params: 12.2M model_fn: models.efficientnet_b3 resolution: 300 weights: models.EfficientNet_B3_Weights.IMAGENET1K_V1 efficientnet_b4: metrics: Acc@1: 83.384 Acc@5: 96.594 GFLOPS: 4.39 Params: 19.3M model_fn: models.efficientnet_b4 resolution: 380 weights: models.EfficientNet_B4_Weights.IMAGENET1K_V1 efficientnet_b5: metrics: Acc@1: 83.444 Acc@5: 96.628 GFLOPS: 10.27 Params: 30.4M model_fn: models.efficientnet_b5 resolution: 456 weights: models.EfficientNet_B5_Weights.IMAGENET1K_V1 efficientnet_b6: metrics: Acc@1: 84.008 Acc@5: 96.916 GFLOPS: 19.07 Params: 43.0M model_fn: models.efficientnet_b6 resolution: 528 weights: models.EfficientNet_B6_Weights.IMAGENET1K_V1 efficientnet_b7: metrics: Acc@1: 84.122 Acc@5: 96.908 GFLOPS: 37.75 Params: 66.3M model_fn: models.efficientnet_b7 resolution: 600 weights: models.EfficientNet_B7_Weights.IMAGENET1K_V1 # EfficientNet V2 models: Improved training efficiency and performance over V1. # These models use progressive learning and optimized scaling for better accuracy. efficientnet_v2_l: metrics: Acc@1: 85.808 Acc@5: 97.788 GFLOPS: 56.08 Params: 118.5M model_fn: models.efficientnet_v2_l resolution: 480 weights: models.EfficientNet_V2_L_Weights.IMAGENET1K_V1 efficientnet_v2_m: metrics: Acc@1: 85.112 Acc@5: 97.156 GFLOPS: 24.58 Params: 54.1M model_fn: models.efficientnet_v2_m resolution: 480 weights: models.EfficientNet_V2_M_Weights.IMAGENET1K_V1 efficientnet_v2_s: metrics: Acc@1: 84.228 Acc@5: 96.878 GFLOPS: 8.37 Params: 21.5M model_fn: models.efficientnet_v2_s resolution: 384 weights: models.EfficientNet_V2_S_Weights.IMAGENET1K_V1 # RegNet models: Designed for scalability and efficiency with a focus on network design. # These models optimize for both accuracy and computational efficiency. regnet_y_128gf: metrics: Acc@1: 86.068 # High accuracy but computationally expensive Acc@5: 97.844 GFLOPS: 127.52 Params: 644.8M model_fn: models.regnet_y_128gf resolution: 224 weights: models.RegNet_Y_128GF_Weights.IMAGENET1K_SWAG_LINEAR_V1 regnet_y_16gf: metrics: Acc@1: 82.886 Acc@5: 96.328 GFLOPS: 15.91 Params: 83.6M model_fn: models.regnet_y_16gf resolution: 224 weights: models.RegNet_Y_16GF_Weights.IMAGENET1K_V2 regnet_y_1_6gf: metrics: Acc@1: 80.876 Acc@5: 95.444 GFLOPS: 1.61 Params: 11.2M model_fn: models.regnet_y_1_6gf resolution: 224 weights: models.RegNet_Y_1_6GF_Weights.IMAGENET1K_V2 regnet_y_32gf: metrics: Acc@1: 83.368 Acc@5: 96.498 GFLOPS: 32.28 Params: 145.0M model_fn: models.regnet_y_32gf resolution: 224 weights: models.RegNet_Y_32GF_Weights.IMAGENET1K_V2 regnet_y_3_2gf: metrics: Acc@1: 81.982 Acc@5: 95.972 GFLOPS: 3.18 Params: 19.4M model_fn: models.regnet_y_3_2gf resolution: 224 weights: models.RegNet_Y_3_2GF_Weights.IMAGENET1K_V2 regnet_y_400mf: metrics: Acc@1: 75.804 Acc@5: 92.742 GFLOPS: 0.4 Params: 4.3M model_fn: models.regnet_y_400mf resolution: 224 weights: models.RegNet_Y_400MF_Weights.IMAGENET1K_V2 regnet_y_800mf: metrics: Acc@1: 78.828 Acc@5: 94.502 GFLOPS: 0.83 Params: 6.4M model_fn: models.regnet_y_800mf resolution: 224 weights: models.RegNet_Y_800MF_Weights.IMAGENET1K_V2 regnet_y_8gf: metrics: Acc@1: 82.828 Acc@5: 96.33 GFLOPS: 8.47 Params: 39.4M model_fn: models.regnet_y_8gf resolution: 224 weights: models.RegNet_Y_8GF_Weights.IMAGENET1K_V2 # Vision Transformer (ViT) models: Transformer-based architecture for image classification. # These models excel in capturing long-range dependencies but require significant compute for larger variants. vit_b_16: metrics: Acc@1: 81.072 # Base ViT model with balanced accuracy and efficiency Acc@5: 95.318 GFLOPS: 17.56 Params: 86.6M model_fn: models.vit_b_16 resolution: 224 weights: models.ViT_B_16_Weights.IMAGENET1K_V1 vit_b_32: metrics: Acc@1: 75.912 # Smaller patch size version of ViT, lower accuracy but fewer computations Acc@5: 92.466 GFLOPS: 4.41 Params: 88.2M model_fn: models.vit_b_32 resolution: 224 weights: models.ViT_B_32_Weights.IMAGENET1K_V1 vit_h_14: metrics: Acc@1: 88.552 # High-performance ViT model with very high accuracy and computational cost Acc@5: 98.694 GFLOPS: 1016.72 Params: 633.5M model_fn: models.vit_h_14 resolution: 224 weights: models.ViT_H_14_Weights.IMAGENET1K_SWAG_E2E_V1 vit_l_16: metrics: Acc@1: 79.662 # Larger ViT model with improved accuracy over base models Acc@5: 94.638 GFLOPS: 61.55 Params: 304.3M model_fn: models.vit_l_16 resolution: 224 weights: models.ViT_L_16_Weights.IMAGENET1K_V1 vit_l_32: metrics: Acc@1: 76.972 # Larger ViT with larger patch size, trading accuracy for reduced compute Acc@5: 93.07 GFLOPS: 15.38 Params: 306.5M model_fn: models.vit_l_32 resolution: 224 weights: models.ViT_L_32_Weights.IMAGENET1K_V1