SlimFace-demo / configs /image_classification_models_config.yaml
danhtran2mind's picture
Upload 164 files
b7f710c verified
# For more details on models, see https://pytorch.org/vision/main/models.html
# EfficientNet models: Designed for efficiency with compound scaling of depth, width, and resolution.
# These models balance accuracy and computational efficiency, ideal for resource-constrained environments.
efficientnet_b0:
metrics:
Acc@1: 77.692 # Top-1 accuracy on ImageNet
Acc@5: 93.532 # Top-5 accuracy on ImageNet
GFLOPS: 0.39 # Computational complexity
Params: 5.3M # Number of parameters
model_fn: models.efficientnet_b0
resolution: 224 # Input image resolution
weights: models.EfficientNet_B0_Weights.IMAGENET1K_V1 # Pretrained weights on ImageNet
efficientnet_b1:
metrics:
Acc@1: 78.642
Acc@5: 94.186
GFLOPS: 0.69
Params: 7.8M
model_fn: models.efficientnet_b1
resolution: 240
weights: models.EfficientNet_B1_Weights.IMAGENET1K_V1
efficientnet_b2:
metrics:
Acc@1: 80.608
Acc@5: 95.31
GFLOPS: 1.09
Params: 9.1M
model_fn: models.efficientnet_b2
resolution: 260
weights: models.EfficientNet_B2_Weights.IMAGENET1K_V1
efficientnet_b3:
metrics:
Acc@1: 82.008
Acc@5: 96.054
GFLOPS: 1.83
Params: 12.2M
model_fn: models.efficientnet_b3
resolution: 300
weights: models.EfficientNet_B3_Weights.IMAGENET1K_V1
efficientnet_b4:
metrics:
Acc@1: 83.384
Acc@5: 96.594
GFLOPS: 4.39
Params: 19.3M
model_fn: models.efficientnet_b4
resolution: 380
weights: models.EfficientNet_B4_Weights.IMAGENET1K_V1
efficientnet_b5:
metrics:
Acc@1: 83.444
Acc@5: 96.628
GFLOPS: 10.27
Params: 30.4M
model_fn: models.efficientnet_b5
resolution: 456
weights: models.EfficientNet_B5_Weights.IMAGENET1K_V1
efficientnet_b6:
metrics:
Acc@1: 84.008
Acc@5: 96.916
GFLOPS: 19.07
Params: 43.0M
model_fn: models.efficientnet_b6
resolution: 528
weights: models.EfficientNet_B6_Weights.IMAGENET1K_V1
efficientnet_b7:
metrics:
Acc@1: 84.122
Acc@5: 96.908
GFLOPS: 37.75
Params: 66.3M
model_fn: models.efficientnet_b7
resolution: 600
weights: models.EfficientNet_B7_Weights.IMAGENET1K_V1
# EfficientNet V2 models: Improved training efficiency and performance over V1.
# These models use progressive learning and optimized scaling for better accuracy.
efficientnet_v2_l:
metrics:
Acc@1: 85.808
Acc@5: 97.788
GFLOPS: 56.08
Params: 118.5M
model_fn: models.efficientnet_v2_l
resolution: 480
weights: models.EfficientNet_V2_L_Weights.IMAGENET1K_V1
efficientnet_v2_m:
metrics:
Acc@1: 85.112
Acc@5: 97.156
GFLOPS: 24.58
Params: 54.1M
model_fn: models.efficientnet_v2_m
resolution: 480
weights: models.EfficientNet_V2_M_Weights.IMAGENET1K_V1
efficientnet_v2_s:
metrics:
Acc@1: 84.228
Acc@5: 96.878
GFLOPS: 8.37
Params: 21.5M
model_fn: models.efficientnet_v2_s
resolution: 384
weights: models.EfficientNet_V2_S_Weights.IMAGENET1K_V1
# RegNet models: Designed for scalability and efficiency with a focus on network design.
# These models optimize for both accuracy and computational efficiency.
regnet_y_128gf:
metrics:
Acc@1: 86.068 # High accuracy but computationally expensive
Acc@5: 97.844
GFLOPS: 127.52
Params: 644.8M
model_fn: models.regnet_y_128gf
resolution: 224
weights: models.RegNet_Y_128GF_Weights.IMAGENET1K_SWAG_LINEAR_V1
regnet_y_16gf:
metrics:
Acc@1: 82.886
Acc@5: 96.328
GFLOPS: 15.91
Params: 83.6M
model_fn: models.regnet_y_16gf
resolution: 224
weights: models.RegNet_Y_16GF_Weights.IMAGENET1K_V2
regnet_y_1_6gf:
metrics:
Acc@1: 80.876
Acc@5: 95.444
GFLOPS: 1.61
Params: 11.2M
model_fn: models.regnet_y_1_6gf
resolution: 224
weights: models.RegNet_Y_1_6GF_Weights.IMAGENET1K_V2
regnet_y_32gf:
metrics:
Acc@1: 83.368
Acc@5: 96.498
GFLOPS: 32.28
Params: 145.0M
model_fn: models.regnet_y_32gf
resolution: 224
weights: models.RegNet_Y_32GF_Weights.IMAGENET1K_V2
regnet_y_3_2gf:
metrics:
Acc@1: 81.982
Acc@5: 95.972
GFLOPS: 3.18
Params: 19.4M
model_fn: models.regnet_y_3_2gf
resolution: 224
weights: models.RegNet_Y_3_2GF_Weights.IMAGENET1K_V2
regnet_y_400mf:
metrics:
Acc@1: 75.804
Acc@5: 92.742
GFLOPS: 0.4
Params: 4.3M
model_fn: models.regnet_y_400mf
resolution: 224
weights: models.RegNet_Y_400MF_Weights.IMAGENET1K_V2
regnet_y_800mf:
metrics:
Acc@1: 78.828
Acc@5: 94.502
GFLOPS: 0.83
Params: 6.4M
model_fn: models.regnet_y_800mf
resolution: 224
weights: models.RegNet_Y_800MF_Weights.IMAGENET1K_V2
regnet_y_8gf:
metrics:
Acc@1: 82.828
Acc@5: 96.33
GFLOPS: 8.47
Params: 39.4M
model_fn: models.regnet_y_8gf
resolution: 224
weights: models.RegNet_Y_8GF_Weights.IMAGENET1K_V2
# Vision Transformer (ViT) models: Transformer-based architecture for image classification.
# These models excel in capturing long-range dependencies but require significant compute for larger variants.
vit_b_16:
metrics:
Acc@1: 81.072 # Base ViT model with balanced accuracy and efficiency
Acc@5: 95.318
GFLOPS: 17.56
Params: 86.6M
model_fn: models.vit_b_16
resolution: 224
weights: models.ViT_B_16_Weights.IMAGENET1K_V1
vit_b_32:
metrics:
Acc@1: 75.912 # Smaller patch size version of ViT, lower accuracy but fewer computations
Acc@5: 92.466
GFLOPS: 4.41
Params: 88.2M
model_fn: models.vit_b_32
resolution: 224
weights: models.ViT_B_32_Weights.IMAGENET1K_V1
vit_h_14:
metrics:
Acc@1: 88.552 # High-performance ViT model with very high accuracy and computational cost
Acc@5: 98.694
GFLOPS: 1016.72
Params: 633.5M
model_fn: models.vit_h_14
resolution: 224
weights: models.ViT_H_14_Weights.IMAGENET1K_SWAG_E2E_V1
vit_l_16:
metrics:
Acc@1: 79.662 # Larger ViT model with improved accuracy over base models
Acc@5: 94.638
GFLOPS: 61.55
Params: 304.3M
model_fn: models.vit_l_16
resolution: 224
weights: models.ViT_L_16_Weights.IMAGENET1K_V1
vit_l_32:
metrics:
Acc@1: 76.972 # Larger ViT with larger patch size, trading accuracy for reduced compute
Acc@5: 93.07
GFLOPS: 15.38
Params: 306.5M
model_fn: models.vit_l_32
resolution: 224
weights: models.ViT_L_32_Weights.IMAGENET1K_V1