AndreCosta commited on
Commit
7b615ae
·
1 Parent(s): b3932dd

Initial clean commit with LFS configured

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. .huggingface/model-index.yaml +32 -0
  3. .project +17 -0
  4. .pydevproject +11 -0
  5. .settings/org.eclipse.core.resources.prefs +6 -0
  6. CHANGELOG.md +481 -0
  7. DataSet/ExtraTests/29bb3ece3180_11.jpg +3 -0
  8. DataSet/annotations/classes.txt +2 -0
  9. DataSet/images/0cdf5b5d0ce1_01.jpg +3 -0
  10. DataSet/masks/0cdf5b5d0ce1_01_mask.png +3 -0
  11. LICENSE +20 -0
  12. README.md +345 -3
  13. __init__.py +0 -0
  14. bad_image.png +3 -0
  15. checkpoints/best_model.pt +3 -0
  16. dice_history.png +3 -0
  17. good_image.png +3 -0
  18. iou_history.png +3 -0
  19. model_card.md +122 -0
  20. report_file.txt +56 -0
  21. requirements.txt +26 -0
  22. run_app.py +9 -0
  23. run_evaluate.py +9 -0
  24. scripts/Dataset/ConvertFormat.py +32 -0
  25. scripts/Dataset/Rename.py +22 -0
  26. scripts/Dataset/Resize.py +25 -0
  27. scripts/Dataset/TrainVal.py +35 -0
  28. scripts/Dataset/__init__.py +0 -0
  29. scripts/Dataset/dataAugmentation.py +48 -0
  30. scripts/Dataset/deleteDuplicates.py +32 -0
  31. scripts/Dataset/getDS_HuggingFace.py +17 -0
  32. scripts/Dataset/getImages.py +17 -0
  33. scripts/Dataset/grays.py +23 -0
  34. scripts/Dataset/mask_diagnosis.py +8 -0
  35. scripts/Dataset/masks.py +47 -0
  36. scripts/Dataset/validMasks.py +45 -0
  37. scripts/Segmentation/Future/__init__.py +0 -0
  38. scripts/Segmentation/Future/cyber_train.py +299 -0
  39. scripts/Segmentation/Future/train_embedded_explicit_model.py +126 -0
  40. scripts/Segmentation/__init__.py +0 -0
  41. scripts/Segmentation/app.py +59 -0
  42. scripts/Segmentation/augment.py +30 -0
  43. scripts/Segmentation/diceLossCriterion.py +22 -0
  44. scripts/Segmentation/evaluate_model.py +93 -0
  45. scripts/Segmentation/focalLoss.py +15 -0
  46. scripts/Segmentation/models.py +78 -0
  47. scripts/Segmentation/segDS.py +42 -0
  48. scripts/Segmentation/train.py +250 -0
  49. scripts/__init__.py +0 -0
  50. scripts/config.py +25 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.jpg filter=lfs diff=lfs merge=lfs -text
38
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
.huggingface/model-index.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # .huggingface/model-index.yaml
2
+ model-index:
3
+ - name: ResNet-UNet Segmenter
4
+ results:
5
+ - task:
6
+ type: image-segmentation
7
+ name: Image Segmentation
8
+ dataset:
9
+ name: Carvana Subset (Indoor Controlled)
10
+ type: image-segmentation
11
+ metrics:
12
+ - type: iou
13
+ value: 0.994
14
+ - type: dice
15
+ value: 0.996
16
+ metadata:
17
+ library_name: pytorch
18
+ tags:
19
+ - image-segmentation
20
+ - unet
21
+ - resnet
22
+ - computer-vision
23
+ - binary-segmentation
24
+ - grayscale
25
+ license: mit
26
+ framework: pytorch
27
+ task:
28
+ type: image-segmentation
29
+ name: Image Segmentation
30
+ datasets:
31
+ - name: CV Image Segmentation (Carvana subset)
32
+ type: image-segmentation
.project ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <projectDescription>
3
+ <name>Hooging_Face_CV_certification</name>
4
+ <comment></comment>
5
+ <projects>
6
+ </projects>
7
+ <buildSpec>
8
+ <buildCommand>
9
+ <name>org.python.pydev.PyDevBuilder</name>
10
+ <arguments>
11
+ </arguments>
12
+ </buildCommand>
13
+ </buildSpec>
14
+ <natures>
15
+ <nature>org.python.pydev.pythonNature</nature>
16
+ </natures>
17
+ </projectDescription>
.pydevproject ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2
+ <?eclipse-pydev version="1.0"?><pydev_project>
3
+
4
+ <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
5
+
6
+ <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python interpreter</pydev_property>
7
+
8
+ <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
9
+ <path>/${PROJECT_DIR_NAME}</path>
10
+ </pydev_pathproperty>
11
+ </pydev_project>
.settings/org.eclipse.core.resources.prefs ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ eclipse.preferences.version=1
2
+ encoding//scripts/Dataset/ConvertFormat.py=utf8
3
+ encoding//scripts/Dataset/Rename.py=utf8
4
+ encoding//scripts/Dataset/Resize.py=utf8
5
+ encoding//scripts/Dataset/TrainVal.py=utf8
6
+ encoding//scripts/Dataset/grays.py=utf8
CHANGELOG.md ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📌 Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+ This file follows the [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format and semantic versioning.
5
+
6
+ [v0.15.0]
7
+ - 2025-07-19 - Version Analysis
8
+
9
+ Adjustments made in model evaluation scripts ("evaluate_model.py" and "app.py") to allow to work with the new architecture of the model.
10
+
11
+ 🟠 [Analysis] Overfitting detection in the latest training seasons
12
+ During the assessment of saved checkpoints (especially at more advanced times), progressive signs of overfitting were observed. The model began to identify regions outside the object of interest, including unwanted noises and leftovers in segmentation - behavior not observed during the initial training epochs.
13
+
14
+ This effect was progressive: the greater the number of times, the more the model "hallucinates" regions, extrapolating the real limits of the expected mask.
15
+
16
+ 📌 Implications:
17
+
18
+ Clear signal that the model is memorizing patterns of the training set, including irrelevant visual artifacts or standards.
19
+
20
+ It can compromise the generalization of the model in real environments or unaware images.
21
+
22
+
23
+ - 2025-07-18
24
+ ✅ Adjustments to the loss and TTA pipeline
25
+ Fixed the DiceLoss function
26
+
27
+ Added shape checking and compatibility between preds and targets to avoid broadcast errors.
28
+
29
+ Included safe conversion to float after binary comparison for class 1 masks.
30
+
31
+ Ensures that targets are correctly reduced if they have dimensions [B, 1, H, W].
32
+
33
+ Modified predict_with_tta to return logits
34
+
35
+ Created the predict_with_tta_logits function, which returns the mean of the logits before argmax, allowing direct use with loss functions such as CrossEntropyLoss and DiceLoss.
36
+
37
+ Maintained compatibility with the TTA structure, but now without compromising the backward compatibility of the training pipeline.
38
+
39
+ Standardization of outputs for use in loss analysis
40
+
41
+ Training adapted to always deliver logit tensors [B, C, H, W] for the loss criterion, regardless of the use of TTA.
42
+
43
+
44
+
45
+ [v0.14.0] - 2025-07-17 - Conditional Control via Settings
46
+ Improvements and Additions:
47
+
48
+ 🔧 Added configurable flags at the beginning of the script (USE_TTA, USE_REFINEMENT, USE_FOCAL_LOSS, GAMMA_FOCAL) to enable/disable advanced behaviors in the pipeline in a simple and controlled manner.
49
+
50
+ ✅ Automatic loss function selection based on the flag:
51
+
52
+ CrossEntropyLoss default.
53
+
54
+ FocalLoss with adjustable gamma (via GAMMA_FOCAL), activateable by flag.
55
+
56
+ 🔁 Test-Time Augmentation (TTA) now optionally applicable in the inference phase:
57
+
58
+ Includes flips and rotations with automatic inversion.
59
+
60
+ Final prediction by averaging probability maps.
61
+
62
+ 🧼 Mask smoothing via morphological closing/opening with cv2, also controllable by flag.
63
+
64
+ Applies morphological refinement to smooth contours and reduce jagged edges.
65
+
66
+ Motivation:
67
+ Allows modular experimentation, with a direct impact on validation metrics (IoU/Dice) without altering the core model or recoding sections. Flexibility is essential for controlled experimentation in the R&D cycle.
68
+
69
+
70
+ [v0.13.0] – 2025-07-16
71
+ ⚙️ **Practical Training Adjustments**
72
+ - **Removed the use of `GradualWarmupScheduler`** due to an import error and dependency conflicts — will be re-evaluated in the future.
73
+ - **Kept the new `CosineAnnealingLR`** scheduler, promoting a smooth variation in the learning rate across epochs.
74
+
75
+ 🧪 **Experimental Configuration**
76
+ - **Adjusted `early_stop_patience` to 40**, allowing the model greater exposure to the data before stopping training due to stagnation.
77
+
78
+ 💡 Notes
79
+ - Testing with 20,000 512×512 images caused GPU memory overhead — revised strategy for smaller batches and progressive adjustments.
80
+
81
+
82
+ [v0.12.0]-2025-07-14
83
+
84
+ 🚀 New Phase: Consistent Dataset and High-Resolution Input
85
+
86
+ 🧼 Dataset Reconstruction
87
+ Dataset completely recreated from scratch, correcting critical flaws in image-to-mask matching.
88
+
89
+ Fixed a bug in the preprocessing script that could mix or swap masks between images.
90
+
91
+ Images and masks are now guaranteed to be aligned, with structural consistency and no label pollution.
92
+
93
+ 📏 Resolution Increase
94
+ Input resolution increased from 256×256 → 512×512, allowing for better definition of shapes and contours.
95
+
96
+ Architecture adjusted to support the new dimensions while maintaining U-Net flow with skip connections.
97
+
98
+ 🧠 Quality and Focus
99
+ Higher information density per image, promoting more refined learning.
100
+
101
+ Expected reduction of "coarse blocks" in predictions, with improvements in edges and spatial orientation.
102
+
103
+ 💡 Notes
104
+ This new stage marks the transition from the exploratory phase to a more mature, validated pipeline aligned with best practices for deep segmentation.
105
+
106
+ Future validations will include overlay visualizations, class-based metrics, and qualitative comparisons between versions.
107
+
108
+
109
+ 🎯 Segmentation and robustness refinements
110
+
111
+ 🧼 Correction of masks with invalid values
112
+ Detected critical error: Some masks had values 2, causing failures in metrics (Valueerror: Unseen Labels).
113
+
114
+ Applied solution:
115
+
116
+ Converted masks for numpy, binarized with mask = (mask> 127) .astype (np.uint8), ensuring only values 0 and 1.
117
+
118
+ Posterior conversion with Torch.from_numpy (...). Long () for use in the model.
119
+
120
+ Validation of integrity with NP.unique () and explicit error if unexpected values are found.
121
+
122
+ 🧪 Pipeline diagnosis
123
+ Added temporary inspection of unique values to masks during __getitem __ () to identify out of standard data.
124
+
125
+ 📉 Correction of metrics
126
+ Sklearn's Jaccard_Score use adjustment for multiclasses scenarios:
127
+
128
+ Replaced averag = 'binary' by averag = 'macro' to avoid error valueerror: target is multiclass ....
129
+
130
+ 🗃️ Dataset and loader
131
+ Transformations maintained with .convert ("L") to ensure gray scale images and masks despite posterior binarization.
132
+
133
+ Confirmation of data alignment between image and mask by direct checking in the loader.
134
+
135
+ 🛠️ DataSet Stability
136
+ Implemented explicit check of the mask on the disc, avoiding silent failures.
137
+
138
+ Pipeline preparation to detect problems early during data loading.
139
+
140
+ 💡 Observations
141
+ Despite refinements, the model still has rude segmentations ("blocks"), but already indicating directionality consistent with the object (car).
142
+
143
+ Next steps include augmentation adjustments, expansion of the receptive Field and improvement in contour capacity.
144
+
145
+
146
+ [v0.11.0] – 2025-07-13
147
+ 🔧 Pipeline and Logging Refactoring
148
+ 📄 Created the save_report() method to save training logs to a file continuously and securely (replaces print for traceability in production).
149
+
150
+ 🧠 Moved the num_epochs, checkpoint_interval, and early_stop_patience parameters to config.py to centralize experiment configuration.
151
+
152
+ ✅ Removed the fixed use of print() during training, facilitating use on clusters, remote notebooks, and reproducibility.
153
+
154
+ 🗂️ Updated train.py to run with zero dependency on manual changes: everything is configurable via config.
155
+
156
+ 🧮 Metric Adjustments
157
+ ✅ Fixed and stabilized IoU and Dice Score calculations after each epoch, with automatic logging in the final report.
158
+
159
+ 🧪 Dice_score calculated via np.logical_and and np.logical_or for greater accuracy and consistency with academic metrics.
160
+
161
+ 📊 Automatic saving of metrics graphs:
162
+
163
+ training_loss.png
164
+
165
+ training_val_accuracy.png
166
+
167
+ iou_history.png
168
+
169
+ dice_history.png
170
+
171
+ 🧬 Dataset
172
+ 🔁 Maintained robust preprocessing and mask binarization.
173
+
174
+ 💡 Validation of continuous image-mask alignment externally (via separate script) before training.
175
+
176
+ 🚀 Certification Preparation
177
+ 🎯 Stable modular structure, suitable for submission to platforms such as Hugging Face.
178
+
179
+ 🔒 Centralized logging ensures hardware traceability (CUDA and Torch versions recorded at the start of training).
180
+
181
+
182
+ [v0.10.0] – 2025-07-12
183
+ 🧾 Training Pipeline Refinements and Automated Reporting
184
+
185
+ 📋 Modularization and Centralized Configuration
186
+ 🔧 Critical parameters now defined via scripts/config.py:
187
+
188
+ num_epochs = 250
189
+
190
+ checkpoint_interval = 15
191
+
192
+ early_stop_patience = 60
193
+
194
+ Easy adjustments without changing the main training code
195
+
196
+ 📝 Execution Log with save_report()
197
+ ✅ Added save_report(row) method to log:
198
+
199
+ Library versions (Torch, CUDA)
200
+
201
+ Progress per epoch (Loss, Accuracy, IoU, Dice)
202
+
203
+ Training Start and End
204
+
205
+ Final Performance Summary
206
+
207
+ 📁 Logs automatically saved in config.report_file, allowing historical tracking and auditing of the execution
208
+
209
+ 💡 Notes
210
+ Code now ready for automated cluster executions, CI/CD, or continuous validation pipelines.
211
+
212
+ Standardization facilitates future integration with TensorBoard, Gradio, or custom dashboards.
213
+
214
+
215
+ [v0.9.0] – 2025-07-10
216
+ 🎯 Certification and Standards Compliance
217
+
218
+ 📦 Class Structure and Compliance
219
+ 🔄 Inverted dataset classes to follow the conventional pattern:
220
+
221
+ 0: Background
222
+
223
+ 1: Object
224
+
225
+ Avoids confusion in standard metrics such as CrossEntropy and Jaccard
226
+
227
+ 🧠 ReLU in Skip Connections
228
+ 🚀 Added F.relu(...) activations after up + skip sums in U-Net, improving the ability to learn nonlinearities between blocks
229
+
230
+ Fixes linear behavior of activations in the decoding phase
231
+
232
+ ⚖️ Adjusted Class Balance
233
+ ⚙️ compute_class_weights() now uses a more robust normalization formula:
234
+
235
+ weights = class_counts.sum() / (2.0 * class_counts + 1e-6)
236
+ weights = weights / weights.sum()
237
+ Avoids overfitting of the minority class without distorting learning.
238
+
239
+ 📊 Advanced Training Metrics
240
+ ✅ Calculation of IoU (Jaccard) and Dice Score on the validation set per epoch:
241
+
242
+ sklearn's jaccard_score()
243
+
244
+ Dice with intersection / union using NumPy
245
+
246
+ 📉 Stored as iou_history and dice_history, with graphs saved via matplotlib
247
+
248
+ 🖼️ Metrics Visualization
249
+ New graphs:
250
+
251
+ iou_history.png
252
+
253
+ dice_history.png
254
+
255
+ All graphs are saved directly, maintaining compatibility with CUDA/headless training environments.
256
+
257
+ 🧪 Stability and Diagnostics
258
+ Fixed bug TypeError: Cannot interpret '-1' as a data type caused by incorrect types in np.concatenate of PyTorch arrays with .astype('int')
259
+
260
+ Now guaranteed Validation data should be np.uint8 to avoid conflicts.
261
+
262
+ 💡 Notes
263
+ Model now follows the Hugging Face Vision Certification metrics standard.
264
+
265
+ Validated pipeline with clear training/validation separation, reliable metrics, and an extensible multiclass structure.
266
+
267
+
268
+ [v0.8.0] – 2025-07-09
269
+ 🧠 Pipeline Architecture and Reconstruction
270
+ ✅ Replaced the binary output model with a multiclass architecture (num_classes=2) with CrossEntropyLoss and softmax, allowing future expansion to multi-class segmentations.
271
+
272
+ 🧪 New composite loss function:
273
+
274
+ Implemented custom DiceLoss with smooth=1e-6 for greater sensitivity to contours and thin areas
275
+
276
+ Combined with class-weighted CrossEntropyLoss: loss = 0.5 * CrossEntropy + 0.5 * DiceLoss
277
+
278
+ 📊 Dynamic calculation of class weights:
279
+
280
+ Added compute_class_weights() method to balance the loss based on the actual pixel frequency per class in the dataset
281
+
282
+ Replaces previous fixed weights, automatically adapting to new datasets
283
+
284
+ 🧬 Dataset and Preprocessing
285
+ 🖼️ Updated Dataset SegmentationDataset:
286
+
287
+ Robust loading with mask presence check
288
+
289
+ Image and mask conversion to grayscale
290
+
291
+ Binarized masks with threshold (mask > 127).long() to ensure values ​​{0, 1}
292
+
293
+ 🎨 Transformations:
294
+
295
+ Applied Resize (256×256) and normalized with mean=[0.5], std=[0.5] for single-channel input
296
+
297
+ 🏗️ ResNetUNet Model
298
+ 🔁 Reconstructed architecture based on resnet50 (pretrained=True):
299
+
300
+ Adapted conv1 for single-channel input
301
+
302
+ Skip connections with residual sum between encoder and decoder
303
+
304
+ Final upsample with nn.Upsample(scale_factor=2) to restore original resolution
305
+
306
+ 🏋️ Training and Monitoring
307
+ 📈 Training with:
308
+
309
+ AdamW with lr=1e-4 and weight_decay=1e-4
310
+
311
+ StepLR scheduler with gamma=0.5 every 10 epochs
312
+
313
+ Early stopping with patience=60
314
+
315
+ Checkpoints saved every 15 epochs
316
+
317
+ Automatic saving of the best model based on train_accuracy
318
+
319
+ 📊 Metrics:
320
+
321
+ Pixel accuracy for training and validation
322
+
323
+ History of loss and accuracy by epoch
324
+
325
+ Graphs saved as .png with plt.savefig() (without plt.show())
326
+
327
+ 💡 Notes
328
+ Model now prepared for multiclass segmentations with greater stability
329
+
330
+ More robust and modular pipeline, with a clear separation between architecture, dataset, loss, and training
331
+
332
+ Structure ready for integration with metrics such as IoU, F1-score, and visualization with TensorBoard
333
+
334
+
335
+ [v0.7.0] – 2025-07-07
336
+ 🧪 Advanced Binary Segmentation
337
+ ✅ Modified architecture: ResNetUNet model adjusted for single-channel output (num_classes=1), with sigmoid applied in the final step — prepared for smooth binary segmentation.
338
+
339
+ 🧠 Masks reformatted in the dataset: converted to float32 with shape [1, H, W] and binarized via threshold, optimizing compatibility with BCEWithLogitsLoss.
340
+
341
+ 🎯 New Composite Loss Function
342
+ ➕ Implemented custom Dice Loss to improve learning of contours and thin areas, combined with BCEWithLogitsLoss in equal weight.
343
+
344
+ 🧬 Formula applied: 0.5 * BCE + 0.5 * Dice, increasing the model's sensitivity to the real geometry of the segmented objects.
345
+
346
+ 🧮 Improved Pixel-Wise Evaluation
347
+ 📏 Pixel accuracy adjusted to consider sigmoid and binary threshold (0.5) in predictions before comparing with masks — makes the calculation more faithful to the purpose of segmentation.
348
+
349
+ 💡 Observations
350
+ Model now captures smoother contours, reducing "square" behavior.
351
+
352
+ Code now ready for integration with advanced metrics such as IoU, Precision/Recall per class, and image visualization with matplotlib or TensorBoard.
353
+
354
+
355
+ [v0.6.0] – 2025-07-07
356
+ 🧠 Training Pipeline Refinement
357
+ 🔁 Training now separated by training and validation: automatic splitting of the SegmentationDataset into 80/20 to monitor generalization.
358
+
359
+ 📊 Validation implemented per epoch with accuracy calculation on the validation set; metric used for early stopping and best_model.pt selection.
360
+
361
+ 📈 Generalization and Robustness
362
+ 🌈 Added augmentation transformations via RandomHorizontalFlip and RandomRotation on the training set, to make the model more resistant to visual variations.
363
+
364
+ ⏳ Early stopping increased: early_stop_patience increased from 20 to 60 epochs, giving more room for progressive learning.
365
+
366
+ 🔁 Hyperparameters and Regularization
367
+ 📉 Added weight_decay=1e-5 in the Adam optimizer for lightweight L2 regularization.
368
+
369
+ 🎯 Best model metric changed: now best_model.pt saves based on the best validation accuracy, not just training.
370
+
371
+ 📊 Results visualization
372
+ 🖼️ New graph generated training_val_accuracy.png showing the evolution of validation accuracy over epochs.
373
+
374
+ 📊 Graphs saved with plt.savefig() after try/except, avoiding failures in environments with graphics rendering issues via CUDA.
375
+
376
+ 💡 Observations
377
+ Model showed qualitative improvement in segmentation with smoother and more responsive contours — previous squares started to follow the car's rotation, indicating spatial learning.
378
+
379
+ Structure ready for future calculation of IoU per class and integration with TensorBoard, if necessary.
380
+
381
+
382
+ ## [v0.5.0] – 2025-07-05
383
+ ### ⚒️ Critical Data Alignment Fixes
384
+ Fixed mask file mismatch: Masks were stored as .png, but dataset loader expected .jpg extension — caused incorrect or failed loading
385
+
386
+ ### 🧠 Applied patch via .replace('.jpg', '.png') in dataset loader to ensure proper image-mask pairing
387
+
388
+ Added FileNotFoundError checks during __getitem__ to avoid silent failures and improve debugging clarity
389
+
390
+ ### 🧠 Dataset Refinements
391
+ Ensured matching Resize(256×256) transformations for both image and mask, using transforms.functional for consistency
392
+
393
+ Binarization of masks confirmed to produce only {0,1} values, avoiding grayscale range leakage
394
+
395
+ Validated with np.unique() on mask tensors — clean value range critical for CrossEntropyLoss
396
+
397
+
398
+ ---
399
+
400
+ ## [v0.4.0] – 2025-07-04
401
+
402
+ ### ✅ Major Improvements
403
+
404
+ - Added **checkpoint saving** every `N` epochs during training, configurable via a new `checkpoint_interval` parameter.
405
+ - Implemented **Early Stopping** based on pixel accuracy, with a configurable patience (`early_stop_patience`) to avoid overfitting.
406
+ - Final model now saved **twice**:
407
+ - `best_model.pt`: Only weights, for inference/embedded use
408
+ - Full model (`torch.save(model, ...)`) at the end, for future reloading
409
+
410
+ ### ⚠️ Critical Bug Diagnosed and Resolved
411
+
412
+ - **Symptom:** Model was training with no improvement; accuracy stuck; no learning observed
413
+ - **Diagnosis:** Masks loaded from `.jpg` were using full grayscale range `[0, 1, ..., 255]` instead of binary values `[0, 1]`
414
+ - **Fix:** Added diagnostic checks using `np.unique` to validate mask classes; incorporated a preprocessing step to binarize masks
415
+
416
+ ### 🧪 Experimental Enhancements
417
+
418
+ - Updated model evaluation interface (`evaluate_model.py`) for batch testing via folder traversal
419
+ - Separated Gradio demo (`app.py`) for certification usability evaluation
420
+ - Integrated plotting of loss and accuracy with graceful error handling (wrapped in `try/except`)
421
+
422
+ ### 🧠 Observations
423
+
424
+ - Problem with **matplotlib crashing** due to CUDA context when using `plt.show()` after training; workaround applied with `plt.savefig()` only
425
+ - CUDA kernel mismatch on certain environments using **dual RTX 4060** detected as 3060 — resolved by adjusting `torch` + `nvidia-driver` stack (manual)
426
+ - Added check to confirm training device (`config.device`) and `torch.cuda.get_arch_list()` for future reproducibility
427
+
428
+ ### 🤝 Acknowledgements
429
+
430
+ - Much of the model debugging was assisted by real-time reasoning and exploration with **ChatGPT**, especially around mask encoding and loss mismatch.
431
+ - Initial development relied on **GitHub Copilot**, with ChatGPT joining later to refator, modularize, and refine robustness for submission.
432
+
433
+ ---
434
+
435
+ ## [v0.3.0] – 2025-06-22
436
+
437
+ 🐛 Bug Fixes
438
+ - Fixed `RuntimeError: only batches of spatial targets supported (3D tensors)` caused by mask dimensions
439
+ - Applied `.squeeze(1)` to target tensors before passing to `CrossEntropyLoss`, ensuring correct shape `(B, H, W)`
440
+ - Root cause: mask loaded with shape `(B, 1, H, W)` instead of `(B, H, W)`
441
+
442
+ 👁️ Observations
443
+ - Issue identified during initial model training with grayscale images and ResNet-based U-Net
444
+ - Fix reduces debugging time from hours to seconds — thanks to a productive collaboration with Microsoft Copilot 🧠
445
+
446
+ ---
447
+
448
+ ## [v0.2.0] – 2025-06-22
449
+
450
+ ### 🔧 Project Restructure
451
+ - Fully reorganized project files to reflect a modular and scalable architecture
452
+ - Added new root folders:
453
+ - `DataSet/Cow_Segmentation_Dataset/` to centralize all data and annotations
454
+ - `scripts/Dataset/` for preprocessing and data preparation logic
455
+ - `scripts/Segmentation/` for training, evaluation, and model utilities
456
+ - `scripts/Segmentation/Future/` to house experimental/embedded extensions
457
+
458
+ ### 📑 Documentation Updates
459
+ - Updated `README.md` to match new folder organization and include a **Future Work** section
460
+ - Updated `model_card.md` to reflect modular design and embedded plans
461
+
462
+ ### 💡 Future-Ready Additions
463
+ - Introduced experimental script `train_embedded_explicit_model.py` for ONNX export and embedded deployment (not yet validated)
464
+
465
+ ---
466
+
467
+ ## [v0.1.0] – Initial Release
468
+
469
+ ### 🚀 Baseline Functionality
470
+ - Preprocessing scripts for grayscale mask generation and dataset formatting
471
+ - Training and evaluation scripts for custom segmentation model
472
+ - Initial model card and license
473
+
474
+ ---
475
+
476
+ ## [v0.0.1] – Project Start
477
+
478
+ - Initial discussion on using ResNet as an encoder in U-Net
479
+ - Creation of an example synthetic dataset
480
+ - Structuring of the basic inference script
481
+ - Validation of the visual pipeline and preprocessing strategy
DataSet/ExtraTests/29bb3ece3180_11.jpg ADDED

Git LFS Details

  • SHA256: 23e473f7d4aac81292ae9720a42ba75323201a5557b668b22ad824b2bcee0a3a
  • Pointer size: 131 Bytes
  • Size of remote file: 110 kB
DataSet/annotations/classes.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 0 - Fundo
2
+ 1 - Objeto
DataSet/images/0cdf5b5d0ce1_01.jpg ADDED

Git LFS Details

  • SHA256: f48d6a7cbd4f1d7c4341143fe64f7ad4d328d6b426b1be3efb23180e8484cd45
  • Pointer size: 130 Bytes
  • Size of remote file: 19.1 kB
DataSet/masks/0cdf5b5d0ce1_01_mask.png ADDED

Git LFS Details

  • SHA256: 4e2d7ae86d5cd88d804d14cf6a8004876d0390aa09cb482edb81d3aae6d45705
  • Pointer size: 129 Bytes
  • Size of remote file: 4.27 kB
LICENSE ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 André Costa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the “Software”), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9
+ of the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16
+ INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
17
+ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
README.md CHANGED
@@ -1,3 +1,345 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Image Segmentation with ResNet + U-Net
2
+
3
+ 💡 ResNet + U-NET fusion combines deep and contextual vision (ResNet) with spatial fidelity and accuracy in the details (U-NET).
4
+ It is a versatile, powerful and high sensitivity architecture - ideal for projects where each pixel matters.
5
+ The model shines in scenarios where the object is small, detailed or textured, and the global context (whole scene) does not help much.
6
+ This makes it ideal for:
7
+ - Medical segmentation (eg tumors, vessels)
8
+ - Industrial defect inspection
9
+ - Embedded vision for robotics or quality control
10
+ ⚠️ However, this current version was trained on a **narrow-domain dataset**, collected under controlled indoor conditions — consistent lighting, high-contrast backgrounds, and fixed camera angles. As a result, its ability to generalize to open-world scenarios (e.g., outdoor images, different backgrounds) is limited.
11
+ **This is not a flaw of the model**, but a **natural reflection of its training data**. When retrained with more diverse and realistic datasets, this architecture has strong potential for robust performance in general-purpose segmentation tasks.
12
+
13
+
14
+ ## 📌 Class Convention
15
+ This project follows the standard:
16
+
17
+ - Class 0: Background
18
+ - Class 1: Segmented Object
19
+
20
+ All masks were converted to reflect this convention before training.
21
+
22
+ ## 🌐 Limitations and Considerations
23
+ This model was trained with images captured in a highly controlled environment: constant lighting, a clean background, and objects (cars) positioned on a rotating platform.
24
+
25
+ As a result, it achieves very high accuracy (IoU > 99%) when evaluated on images similar to those in the original dataset. However, its performance deteriorates significantly when exposed to images collected outdoors, with variations in light, angle, background, and perspective.
26
+
27
+ This limitation was expected and will be taken into account for future versions with more diverse datasets.
28
+
29
+ Good Image..
30
+ ![training accuracy](./good_image.png) "good_image.png: Segmentation under ideal studio lighting"
31
+
32
+ Bad Image..
33
+ ![training accuracy](./bad_image.png) "Failure example with open-world street background"
34
+
35
+
36
+ ## 🌟 Objective
37
+
38
+ To segment objects in custom grayscale images based on manual annotations, using a complete training pipeline, automated inference, and visual mask validation.
39
+
40
+ ## 🤖 Notes on Development
41
+
42
+ This project was born after many hours of experimentation, learning and progress driven by caffeine.
43
+ Unlike other projects I have participated in before, this one evolved incredibly quickly thanks to the support of artificial intelligence such as Copilot (Microsoft) and ChatGPT (OpenAI). Without a doubt, these are tools that are way ahead of their time.
44
+ As part of the experience of using and learning from these advanced AI tools, I always threw problems at both of them, to measure their performance and compare their responses. And to make the experience more fun, I kept an extremely formal dialogue with one and not at all formal with the other to see how they would react. And after a while, I reversed it, now being informal with the one that was previously formal and vice versa.
45
+ Big thanks to both copilots — one named Microsoft, the other simply GPT.
46
+ - Powered by: PyTorch, Gradio, OpenCV, Matplotlib, and Hugging Face Datasets
47
+
48
+
49
+ ## 📁 Project Structure
50
+ .
51
+ ├── run_app.py
52
+ ├── bad_image.png
53
+ ├── CHANGELOG.md
54
+ ├── checkpoints
55
+ │   ├── best_model.pt
56
+ │   └── modelo_completo.pth
57
+ ├── DataSet
58
+ │   ├── annotations
59
+ │   │   └── classes.txt
60
+ │   ├── ExtraTests
61
+ │   ├── images
62
+ │   └── masks
63
+ ├── dice_history.png
64
+ ├── run_evaluate.py
65
+ ├── good_image.png
66
+ ├── __init__.py
67
+ ├── iou_history.png
68
+ ├── LICENSE
69
+ ├── model_card.md
70
+ ├── .huggingface
71
+ │   └── model-index.yaml
72
+ ├── README.md
73
+ ├── report_file.txt
74
+ ├── requirements.txt
75
+ ├── scripts
76
+ │   ├── config.py
77
+ │   ├── Dataset
78
+ │   │   ├── ConvertFormat.py
79
+ │   │   ├── dataAugmentation.py
80
+ │   │   ├── deleteDuplicates.py
81
+ │   │   ├── getDS_HuggingFace.py
82
+ │   │   ├── getImages.py
83
+ │   │   ├── grays.py
84
+ │   │   ├── __init__.py
85
+ │   │   ├── mask_diagnosis.py
86
+ │   │   ├── masks.py
87
+ │   │   ├── Rename.py
88
+ │   │   ├── Resize.py
89
+ │   │   ├── TrainVal.py
90
+ │   │   └── validMasks.py
91
+ │   ├── __init__.py
92
+ │   └── Segmentation
93
+ │   ├── app.py
94
+ │   ├── augment.py
95
+ │   ├── diceLossCriterion.py
96
+ │   ├── evaluate_model.py
97
+ │   ├── flagged
98
+ │   ├── focalLoss.py
99
+ │   ├── Future
100
+ │   ├── __init__.py
101
+ │   ├── models.py
102
+ │   ├── segDS.py
103
+ │   └── train.py
104
+ ├── structure.txt
105
+ ├── training_loss.png
106
+ └── training_val_accuracy.png
107
+
108
+ ### 📁 Root Directory
109
+ | Name | Description |
110
+ |--------------------------|-----------------------------------------------------------------------------|
111
+ | `run_app.py` | Launcher script — possibly for local inference or interface |
112
+ | `bad_image.png` | Example of a failed prediction (for benchmarking or documentation) |
113
+ | `good_image.png` | Example of a successful prediction (used for showcasing model quality) |
114
+ | `CHANGELOG.md` | History of changes and version updates |
115
+ | `checkpoints/` | Contains trained model files (`best_model.pt`, `modelo_completo.pth`) |
116
+ | `DataSet/` | Contains training images, masks, annotations, and extra test sets |
117
+ | `dice_history.png` | Visualization of Dice score progression during training |
118
+ | `iou_history.png` | Graph of Intersection over Union (IoU) evolution across epochs |
119
+ | `training_loss.png` | Plot showing model loss evolution throughout training |
120
+ | `training_val_accuracy.png` | Graph of validation accuracy during model training |
121
+ | `run_evaluate.py` | Evaluation script runnable from root — assesses model performance |
122
+ | `__init__.py` | Declares root as a Python package (if imported externally) |
123
+ | `LICENSE` | Legal terms for usage and redistribution |
124
+ | `model_card.md` | Technical summary of model details, performance, and intended use |
125
+ | `.huggingface/model-index.yaml` | Configuration file for Hugging Face model registry (optional export) |
126
+ | `README.md` | Main documentation file — project overview, usage, and setup guide |
127
+ | `report_file.txt` | Training log and report output saved during execution |
128
+ | `requirements.txt` | List of dependencies needed for running the project |
129
+ | `scripts/` | Main logic for training, evaluation, dataset preparation, and modeling |
130
+ | `structure.txt` | Manual export of the folder structure, used as reference or debug aid |
131
+
132
+ ### 📁 DataSet/
133
+ | Name | Description |
134
+ |-------------------|---------------------------------------------------------------------------------|
135
+ | `annotations/` | Contains `classes.txt`, defining class labels used in segmentation |
136
+ | `images/` | Input images used for training and evaluation |
137
+ | `masks/` | Segmentation masks aligned with input images |
138
+ | `ExtraTests/` | Optional dataset with additional test cases for generalization assessment |
139
+
140
+ ### 📁 scripts/
141
+ | Name | Description |
142
+ |----------------------|-------------------------------------------------------------------------------|
143
+ | `config.py` | Configuration module holding paths, flags, and hyperparameters |
144
+ | `__init__.py` | Declares `scripts/` as an importable Python module |
145
+
146
+
147
+ ### 📁 scripts/Dataset/
148
+ | Name | Description |
149
+ |------------------------|-----------------------------------------------------------------------------|
150
+ | `ConvertFormat.py` | Converts image or annotation formats (e.g. from JPG to PNG, or COCO to mask)|
151
+ | `dataAugmentation.py` | Applies offline augmentations to images or masks |
152
+ | `deleteDuplicates.py` | Detects and removes duplicate samples |
153
+ | `getDS_HuggingFace.py` | Downloads datasets from Hugging Face 🤗 |
154
+ | `getImages.py` | Image retrieval or organization from storage |
155
+ | `grays.py` | Converts images to grayscale |
156
+ | `mask_diagnosis.py` | Validates and diagnoses potential issues in masks |
157
+ | `masks.py` | Performs manipulation or binarization of segmentation masks |
158
+ | `Rename.py` | Batch renaming utility to standardize filenames |
159
+ | `Resize.py` | Resizes images and masks to uniform dimensions |
160
+ | `TrainVal.py` | Performs dataset train/validation splitting |
161
+ | `validMasks.py` | Checks for validity in mask formatting and values |
162
+ | `__init__.py` | Declares `Dataset/` as a Python package |
163
+
164
+
165
+ ### 📁 scripts/Segmentation/
166
+ | Name | Description |
167
+ |------------------------|-----------------------------------------------------------------------------|
168
+ | `app.py` | Local interface for model inference — CLI or GUI |
169
+ | `augment.py` | Online augmentations and Test-Time Augmentation (TTA) |
170
+ | `diceLossCriterion.py` | Custom Dice Loss implementation for segmentation |
171
+ | `focalLoss.py` | Custom Focal Loss implementation to handle class imbalance |
172
+ | `evaluate_model.py` | Model evaluator with metrics like IoU, Dice, and pixel accuracy |
173
+ | `models.py` | Contains neural network architecture (e.g. UNet based on ResNet) |
174
+ | `segDS.py` | Dataset class for segmentation tasks, loading images and masks |
175
+ | `train.py` | Main training script with logging, plotting, checkpointing, and early stop |
176
+ | `Future/` | Experimental code including auto hyperparameter tuning |
177
+ | `flagged/` | Optional output folder for flagged evaluations or debug samples |
178
+ | `__init__.py` | Declares `Segmentation/` as a Python package |
179
+
180
+ ## Dataset
181
+ This project uses data from the [CV Image Segmentation Dataset](https://www.kaggle.com/datasets/antoreepjana/cv-image-segmentation), which provides paired images and masks for semantic segmentation tasks.
182
+ The Dataset presents some distinct data subsets.
183
+ I only used the images related to carvana cars (Kaggle Carvana Car Mask Segmentation). This was the dataset used to test the project ...
184
+
185
+ The data subset used as the dataset for the project was pre-processed with the following order of scripts present in this project:
186
+ 1 - Run getImages.py #Or use other data sources.
187
+ 2 - Visually inspect the collected images.
188
+ 3 - Run deleteDuplicates.py
189
+ 4 - Run ConvertFormat.py
190
+ 5 - Run Resize.py (Must be run for both the image and mask directories).
191
+ 6 - Run grays.py (Must be run for both the image and mask directories).
192
+ 8 - Make annotations.
193
+ 9 - Run masks.py
194
+ 10 - Run validMasks.py
195
+ 11 - Run TrainVal.py
196
+
197
+ ---
198
+
199
+ ## ⚙️ Model
200
+
201
+ * Architecture: ResNet encoder + U-Net decoder
202
+ * Input: 1-channel grayscale, resized to 512×512
203
+ * Loss: Cross Entropy Loss with class weighting
204
+ * Optimizer: Adam
205
+ * Scheduler: StepLR with decay
206
+ * Training duration: configurable (default: 400 epochs)
207
+ * Early Stopping: based on accuracy stagnation
208
+ * Checkpoints: saved every N epochs + best model saved
209
+
210
+ Training script: `scripts/Segmentation/train.py`
211
+ Evaluation scripts:
212
+
213
+ * `scripts/Segmentation/evaluate_model.py`: Batch evaluation over image folders
214
+ * `scripts/Segmentation/app.py`: Gradio demo for interactive inference
215
+
216
+ * `run_app.py`: Wrapper script to launch the Gradio interface from the root directory (calls scripts/Segmentation/app.py internally)
217
+ * `run_evaluate.py`: wrapper script to launch the general pre-testing script from the root directory (calls scripts/Segmentation/evaluate_model.py internally)
218
+ 📄 The model is documented and registered via model-index.yaml for proper listing on Hugging Face Hub.
219
+
220
+ ---
221
+
222
+ ## 📈 Evaluation
223
+
224
+ Quantitative metrics include:
225
+
226
+ * Intersection over Union (IoU)
227
+ * Dice coefficient
228
+ * Accuracy, Precision, Recall
229
+ * Balanced Accuracy and MCC
230
+
231
+ Visual inspection is supported via overlay masks in the ExtraTests/ folder.
232
+
233
+ ![training accuracy](./training_accuracy.png)
234
+
235
+ ![training loss](./training_loss.png)
236
+
237
+ ![iou_history](./iou_history.png)
238
+
239
+ ![dice_history](./dice_history.png)
240
+
241
+ ---
242
+
243
+ ## 🔬 Future Work
244
+
245
+ The directory `scripts/Segmentation/Future/` includes planned extensions for embedded deployment:
246
+
247
+ * `train_embedded_explicit_model.py`: A simplified and modular training script for generating lightweight ONNX models.
248
+ Note: This script was not executed or validated during this certification phase.
249
+
250
+ ---
251
+
252
+ ## 🏗 Deployment Options
253
+
254
+ This project includes two scripts for model evaluation:
255
+
256
+ ### 🧪 Batch Evaluation Script (`evaluate_model.py`)
257
+
258
+ Use this script to run the model on an entire directory of test images. Ideal for debugging, validation, and quantitative analysis.
259
+
260
+ ```bash
261
+ python evaluate_model.py --input ./your-test-images/
262
+ ```
263
+
264
+ You can modify this script to save prediction masks, compute metrics (IoU, pixel accuracy), or visualize results in batch.
265
+
266
+ ---
267
+
268
+ ### 🌐 Interactive Web Demo (`app.py`)
269
+
270
+ This script provides an interactive interface using [Gradio](https://www.gradio.app/). It's designed for easy deployment and model demonstration, such as on Hugging Face Spaces.
271
+
272
+ To launch the web app locally:
273
+
274
+ ```bash
275
+ python app.py
276
+ ```
277
+
278
+ Or try it online (if hosted):
279
+
280
+ 👉 [Live demo on Hugging Face Spaces](https://huggingface.co/spaces/seu-usuario/seu-modelo) *TODO:(link será atualizado após submissão)*
281
+
282
+
283
+ This interface allows anyone to upload an image and instantly see the segmentation results — no installation required.
284
+
285
+ ---
286
+
287
+ 📌 **Tip**: Use `evaluate_model.py` during development and testing, and `app.py` for sharing and showcasing your model.
288
+
289
+ ---
290
+
291
+ ## 🏆 Certification Context
292
+
293
+ This repository was submitted for the Hugging Face Computer Vision Certification and is built upon reproducibility, modularity, dataset transparency, and technical rigor.
294
+
295
+ ---
296
+
297
+ ## 📄 License
298
+
299
+ This project is licensed under the MIT License.
300
+ Dataset usage must comply with the original Kaggle dataset license terms.
301
+
302
+ ---
303
+
304
+ ## 🔮 Future improvements
305
+
306
+ Some steps are already planned for the project's evolution:
307
+
308
+ * Architecture refinement: test lighter variants (e.g. ResNet18, MobileNetV3) to compare performance in embedded environments.
309
+ * Training with data augmentation: use Data Augmentation strategies (rotation, noise, scale, brightness) to increase model robustness.
310
+ * Cross-validation: include a cross-validation strategy to increase confidence in metrics.
311
+ * Conversion to ONNX/TensorRT: prepare an exportable version of the model for inference on edge devices.
312
+ * Deployment on specific hardware: test inference on ESP32-S3 or Raspberry Pi using a simplified pipeline with float16.
313
+ * Visualization interface: create a simple script or panel that allows you to upload an image and view the segmentation live.
314
+
315
+ These improvements will be implemented as the project progresses, keeping the focus on lightness, modularity, and real applicability in computer vision with monochromatic images.
316
+
317
+ ---
318
+
319
+ ## 🌟 Final thoughts: why this certification matters
320
+
321
+ This project represents more than just completing a technical challenge. For me, it is the fulfillment of a long-held dream — to earn a professional certification that values knowledge, practice, and the ability to solve real-world problems, rather than just familiarity with specific versions of tools or frameworks.
322
+
323
+ For many years, I experienced the frustrating side of commercial certifications that felt more like traps than opportunities: exams based on obsolete technologies, questionable application centers, and mechanisms that created more obstacles than recognition. That never represented who I am — or what I am capable of building.
324
+
325
+ This certification, promoted by Hugging Face, is different. It validates true competencies in machine learning and computer vision based on a real-world project, executed end-to-end. It is a type of recognition that carries technical, ethical, and personal value.
326
+
327
+ That is why it is not “just another delivery.” It is a turning point.
328
+
329
+
330
+ ---
331
+
332
+ ## 🌟 Important notes…
333
+
334
+ 1) The IDE used in the project was Eclipse (https://eclipseide.org/) using the PyDev module (https://www.pydev.org/). In this environment it was necessary to include the project path in PyDev-PYTHONPATH to perfectly recognize the includes of some files, as was the case with config.py.
335
+
336
+ 2) The model is being trained with the "train.py" script.
337
+ However, there is a second training script called "cyber_train.py."
338
+ This is an empirical test I'm conducting. A little research of my own.
339
+ In "train," the hyperparameters are chosen manually.
340
+ In "cyber_train," the script will run 25 short training sessions, each lasting 5 epochs, to test the hyperparameters within the established limits and determine the best ones. Then, the actual training will be performed using the best hyperparameters detected.
341
+ And where does my empirical research come in?
342
+ I'm training first with the simplest version of the script, measuring how long it takes me to arrive at a model with a good accuracy percentage.
343
+ Once this is done, I'll run the automated version...
344
+ Then, I'll compare which of the two models performed better and how long it took me to achieve each one...
345
+ This will serve as a reference for more accurate trade-offs in future projects.
__init__.py ADDED
File without changes
bad_image.png ADDED

Git LFS Details

  • SHA256: 59fdd2c69c6fd9c5c63e64c567e97082de4ddee9975a94e3974a3d885f02a413
  • Pointer size: 131 Bytes
  • Size of remote file: 232 kB
checkpoints/best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:196deed7297ecec59a59b61d6f93fb85655bc9130b4a3360c2a45117f12296ed
3
+ size 188378594
dice_history.png ADDED

Git LFS Details

  • SHA256: 4eeb398651703b9c3421ca7450077954699257d8c0b87d1b04ce02aedf8f76a6
  • Pointer size: 130 Bytes
  • Size of remote file: 39.3 kB
good_image.png ADDED

Git LFS Details

  • SHA256: 0c59a27058fea8007b1ebccfe2e68099c86c81d4075f279ad8df7dbcc2f11f73
  • Pointer size: 131 Bytes
  • Size of remote file: 147 kB
iou_history.png ADDED

Git LFS Details

  • SHA256: 84fcfa154f6091b8f9fcaaac86c02ca12d2f464a0d170263f506573e2093a154
  • Pointer size: 130 Bytes
  • Size of remote file: 40.5 kB
model_card.md ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📑 Model Card – Segmentation
2
+
3
+ ## 🧾 Overview
4
+
5
+ 💡 **ResNet + U-Net fusion** combines deep and contextual vision (ResNet) with spatial fidelity and precision in details (U-Net).
6
+ It is a versatile, powerful, and high-sensitivity architecture — ideal for projects where **every pixel matters**.
7
+
8
+ The model excels in scenarios where the object is **small, detailed, or textured**, and where the **global scene context offers little help**.
9
+
10
+ This makes it ideal for:
11
+ - Medical segmentation (e.g., tumors, vessels)
12
+ - Industrial defect inspection
13
+ - Embedded vision for robotics or precision tasks
14
+
15
+ ⚠️ However, this specific version was trained on a **narrow-domain dataset**, captured under **controlled indoor conditions**: consistent lighting, high-contrast backgrounds, and fixed camera angles.
16
+ As a result, its ability to generalize to open-world scenarios (e.g., outdoor environments, variable backgrounds) is limited.
17
+
18
+ **This is not a flaw in the model**, but a **natural reflection of the training data**.
19
+ When retrained with more diverse and realistic datasets, this architecture is highly capable of delivering robust performance across a wide range of segmentation tasks.
20
+
21
+ ---
22
+
23
+ ## ☕ Behind the Scenes
24
+
25
+ This certification project was built one commit at a time — powered by curiosity, long debugging sessions, strategic doses of caffeine, and great support from **Microsoft Copilot** and **ChatGPT (OpenAI)**, whose insights were essential in structuring the segmentation pipeline and planning its embedded future.
26
+
27
+ > "Every time the model tries to segment, the square figure resurfaces. Not as an error, but as a reminder: deep learning can be quite shallow when the curse of imperfect geometry sets in.
28
+ > And even when all the code is rewritten, the world is realigned, and optimism rises again… there she is: the misshapen quadratic figure.
29
+ > Unfazed, unshakeable, perhaps even moved by her own stubbornness. She's not a bug — she's a character."
30
+
31
+ ---
32
+
33
+ ## 🗂️ Dataset
34
+
35
+ This model was trained using a subset of the [CV Image Segmentation Dataset](https://www.kaggle.com/datasets/antoreepjana/cv-image-segmentation), available on Kaggle.
36
+
37
+ - **Author**: Antoreep Jana
38
+ - **License**: For educational and non-commercial use
39
+ - **Content**: 300+ annotated images for binary segmentation
40
+ - **Preprocessing**: All images resized to 512×512 and converted to grayscale
41
+
42
+ ⚠️ *Only a filtered and preprocessed subset (related to car images) was used for this version.*
43
+ The Dataset presents some distinct data subsets.
44
+ I only used the images related to carvana cars (Kaggle Carvana Car Mask Segmentation). This was the dataset used to test the project ...
45
+
46
+ ---
47
+
48
+ ## ⚙️ Model Architecture
49
+
50
+ - **Encoder**: ResNet-50 (pretrained, adapted for 1-channel input)
51
+ - **Decoder**: U-Net with skip connections and bilinear upsampling
52
+ - **Input**: Grayscale, 512×512
53
+ - **Output**: Binary segmentation mask (background vs. object)
54
+ - **Loss**: Composite of `CrossEntropyLoss + DiceLoss`
55
+ - **Framework**: PyTorch
56
+
57
+ ---
58
+
59
+ ## 📊 Evaluation Metrics
60
+
61
+ - Pixel Accuracy (train/val)
62
+ - Dice Coefficient
63
+ - CrossEntropy Loss
64
+ - Class-weighted loss balancing
65
+ - *(IoU, MCC, Precision/Recall planned for future integration)*
66
+
67
+ 🧪 Evaluation performed using `evaluate_model.py`
68
+
69
+ ---
70
+
71
+ ## ⚠️ Limitations
72
+
73
+ This model achieves excellent results when tested on **studio-like images**: consistent lighting, neutral backgrounds, and static perspectives.
74
+
75
+ However, performance decreases on **unseen outdoor scenarios** (e.g., cars on the street, parking lots) — where background noise, lighting variation, and camera angle impact results.
76
+
77
+ ➡️ This **limitation is dataset-induced**, not architectural.
78
+ When trained on more realistic data, this model generalizes well due to its high sensitivity to texture and spatial structure.
79
+
80
+ ---
81
+
82
+ ## 🚀 Intended Use
83
+
84
+ Best suited for applications where conditions are similar to the training set, such as:
85
+
86
+ - Quality control in automotive photography studios
87
+ - Automated documentation of vehicles in inspection booths
88
+ - Offline image processing for structured, grayscale datasets
89
+
90
+ ---
91
+
92
+ ## 💡 Recommendations
93
+
94
+ To deploy in open-world environments (e.g., mobile robots, outdoor cameras), it is strongly recommended to **retrain or fine-tune** the model using a **more heterogeneous dataset**.
95
+
96
+ ---
97
+
98
+ ## 🔬 Planned Extensions
99
+
100
+ The following experimental modules are under active development and may be integrated in future releases:
101
+
102
+ 1️⃣ **Embedded Deployment Pipeline**
103
+ - Export to ONNX format with float16 precision
104
+ - C++ reimplementation targeting edge devices such as ESP32-S3 and STM32H7
105
+ - Lightweight modular training script:
106
+ `scripts/Segmentation/Future/train_embedded_explicit_model.py`
107
+ *Status: Experimental – not validated in this version*
108
+
109
+ 2️⃣ **Automated Hyperparameter Optimization**
110
+ - Training script that performs automatic hyperparameter search and tuning before final training
111
+ - Designed to improve efficiency and reduce manual configuration
112
+ - Script:
113
+ `scripts/Segmentation/Future/cyber_train.py`
114
+ *Status: Experimental – not validated in this version*
115
+
116
+
117
+ ---
118
+
119
+ ## 🪪 Licensing
120
+
121
+ - **Code**: MIT License
122
+ - **Dataset**: Attribution required (as per Kaggle contributor)
report_file.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2.3.1+cu121
2
+ 12.1
3
+ ['sm_50', 'sm_60', 'sm_70', 'sm_75', 'sm_80', 'sm_86', 'sm_90']
4
+
5
+
6
+
7
+ Starting training on: 2025-07-22 09:53:58.111068
8
+ Epoch 1/100, Loss: 318.6268, Train Acc: 0.8727, Val Acc: 0.9918, IoU: 0.9758, Dice: 0.9806
9
+ 🔸 New best model at epoch 1 (acc: 0.8727) — saving best_model.pt
10
+ Epoch 2/100, Loss: 269.4433, Train Acc: 0.8604, Val Acc: 0.9852, IoU: 0.9565, Dice: 0.9645
11
+ Epoch 3/100, Loss: 38.0319, Train Acc: 0.8820, Val Acc: 0.9817, IoU: 0.9477, Dice: 0.9575
12
+ 🔸 New best model at epoch 3 (acc: 0.8820) — saving best_model.pt
13
+ Epoch 4/100, Loss: 31.6814, Train Acc: 0.8829, Val Acc: 0.9211, IoU: 0.8126, Dice: 0.8404
14
+ 🔸 New best model at epoch 4 (acc: 0.8829) — saving best_model.pt
15
+ Epoch 5/100, Loss: 26.3503, Train Acc: 0.8828, Val Acc: 0.9449, IoU: 0.8603, Dice: 0.8827
16
+ Epoch 6/100, Loss: 21.8011, Train Acc: 0.8823, Val Acc: 0.9263, IoU: 0.8224, Dice: 0.8491
17
+ Epoch 7/100, Loss: 19.5440, Train Acc: 0.8828, Val Acc: 0.9199, IoU: 0.8102, Dice: 0.8381
18
+ Epoch 8/100, Loss: 28.5497, Train Acc: 0.8818, Val Acc: 0.9641, IoU: 0.9036, Dice: 0.9204
19
+ Epoch 9/100, Loss: 23.4882, Train Acc: 0.8827, Val Acc: 0.9236, IoU: 0.8172, Dice: 0.8444
20
+ Epoch 10/100, Loss: 18.1255, Train Acc: 0.8838, Val Acc: 0.9187, IoU: 0.8079, Dice: 0.8361
21
+ 🔸 New best model at epoch 10 (acc: 0.8838) — saving best_model.pt
22
+ Epoch 11/100, Loss: 16.7196, Train Acc: 0.8844, Val Acc: 0.9135, IoU: 0.7985, Dice: 0.8278
23
+ 🔸 New best model at epoch 11 (acc: 0.8844) — saving best_model.pt
24
+ Epoch 12/100, Loss: 35.1093, Train Acc: 0.8820, Val Acc: 0.9400, IoU: 0.8503, Dice: 0.8741
25
+ Epoch 13/100, Loss: 17.0448, Train Acc: 0.8827, Val Acc: 0.9333, IoU: 0.8365, Dice: 0.8618
26
+ Epoch 14/100, Loss: 15.8128, Train Acc: 0.8842, Val Acc: 0.9202, IoU: 0.8110, Dice: 0.8391
27
+ Epoch 15/100, Loss: 14.9408, Train Acc: 0.8841, Val Acc: 0.9278, IoU: 0.8256, Dice: 0.8521
28
+ Epoch 16/100, Loss: 20.7117, Train Acc: 0.8830, Val Acc: 0.9222, IoU: 0.8149, Dice: 0.8426
29
+ Epoch 17/100, Loss: 15.2986, Train Acc: 0.8838, Val Acc: 0.9214, IoU: 0.8132, Dice: 0.8410
30
+ Epoch 18/100, Loss: 13.8610, Train Acc: 0.8831, Val Acc: 0.9273, IoU: 0.8245, Dice: 0.8511
31
+ Epoch 19/100, Loss: 13.5508, Train Acc: 0.8831, Val Acc: 0.9360, IoU: 0.8418, Dice: 0.8665
32
+ Epoch 20/100, Loss: 12.6651, Train Acc: 0.8823, Val Acc: 0.9171, IoU: 0.8050, Dice: 0.8336
33
+ Epoch 21/100, Loss: 12.4524, Train Acc: 0.8844, Val Acc: 0.9254, IoU: 0.8208, Dice: 0.8478
34
+ 🔸 New best model at epoch 21 (acc: 0.8844) — saving best_model.pt
35
+ Epoch 22/100, Loss: 11.9378, Train Acc: 0.8848, Val Acc: 0.9184, IoU: 0.8073, Dice: 0.8356
36
+ 🔸 New best model at epoch 22 (acc: 0.8848) — saving best_model.pt
37
+ Epoch 23/100, Loss: 11.6401, Train Acc: 0.8844, Val Acc: 0.9239, IoU: 0.8178, Dice: 0.8449
38
+ Epoch 24/100, Loss: 11.1857, Train Acc: 0.8835, Val Acc: 0.9269, IoU: 0.8237, Dice: 0.8505
39
+ Epoch 25/100, Loss: 25.1642, Train Acc: 0.8828, Val Acc: 0.9221, IoU: 0.8147, Dice: 0.8424
40
+ Epoch 26/100, Loss: 13.8112, Train Acc: 0.8839, Val Acc: 0.9135, IoU: 0.7987, Dice: 0.8280
41
+ Epoch 27/100, Loss: 11.8389, Train Acc: 0.8833, Val Acc: 0.9147, IoU: 0.8008, Dice: 0.8299
42
+ Epoch 28/100, Loss: 11.0536, Train Acc: 0.8832, Val Acc: 0.9105, IoU: 0.7930, Dice: 0.8229
43
+ Epoch 29/100, Loss: 10.6666, Train Acc: 0.8847, Val Acc: 0.9156, IoU: 0.8024, Dice: 0.8313
44
+ Epoch 30/100, Loss: 10.2207, Train Acc: 0.8834, Val Acc: 0.9165, IoU: 0.8042, Dice: 0.8331
45
+ Epoch 31/100, Loss: 12.5192, Train Acc: 0.8848, Val Acc: 0.9535, IoU: 0.8794, Dice: 0.8996
46
+ Epoch 32/100, Loss: 10.1232, Train Acc: 0.8844, Val Acc: 0.9356, IoU: 0.8412, Dice: 0.8660
47
+
48
+ ⛔ Early stopping triggered at epoch 32
49
+ Completing training on: 2025-07-22 19:30:39.892257
50
+ Total training execution time = 9:36:41.781189
51
+
52
+ Training Summary:
53
+ Min Loss: 10.1232
54
+ Max Loss: 318.6268
55
+ Loss final: 10.1232
56
+ Best Val Acc: 0.9918
requirements.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #albumentations==1.4.14
2
+ albumentations==2.0.8
3
+ #datasets==2.18.0
4
+ datasets==3.6.0
5
+ gradio==5.38.2
6
+ ImageHash==4.3.2
7
+ matplotlib==3.10.3
8
+ numpy==1.21.5
9
+ #opencv_contrib_python==4.6.0.66
10
+ opencv_contrib_python==4.9.0.80
11
+ opencv_contrib_python_headless==4.9.0.80
12
+ #opencv_python==4.6.0.66
13
+ opencv_python==4.9.0.80
14
+ opencv_python_headless==4.10.0.84
15
+ pandas==2.0.3
16
+ #Pillow==9.0.1
17
+ Pillow==11.3.0
18
+ scikit_learn==1.4.0
19
+ simple_image_download==0.2
20
+ torch==2.3.1
21
+ #torch==2.1.2
22
+ #torch==1.11.0
23
+ torchvision==0.18.1
24
+ #torchvision==0.16.2
25
+ #torchvision==0.12.0
26
+ warmup_scheduler==0.3
run_app.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.dirname(__file__)))
5
+
6
+ from scripts.Segmentation import app
7
+
8
+ if __name__ == '__main__':
9
+ app.demo.launch(share=True)
run_evaluate.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.dirname(__file__)))
5
+
6
+ from scripts.Segmentation import evaluate_model
7
+
8
+ if __name__ == '__main__':
9
+ evaluate_model.run()
scripts/Dataset/ConvertFormat.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #encoding=utf8
2
+ import os
3
+ from PIL import Image
4
+ import scripts.config as config
5
+
6
+ target = config.masks
7
+ extension_destination = '.png'
8
+
9
+ print('Starting processing...')
10
+ files = os.listdir(target)
11
+ for fileOne in files:
12
+ nameFile, extension = os.path.splitext(fileOne)
13
+ print('Testing: ', fileOne)
14
+ if((extension != extension_destination) and
15
+ (extension != '.json')):
16
+ if(extension == ''):
17
+ extension = '....'
18
+
19
+ print('\nRenaming: ' + str(fileOne))
20
+ origem = os.path.join(target, fileOne)
21
+
22
+ destino = fileOne.replace(extension, extension_destination)
23
+ if(extension == '....'):
24
+ destino = destino + extension_destination
25
+ destino = os.path.join(target, destino)
26
+
27
+ im = Image.open(origem).convert('RGB')
28
+ im.save(destino, 'PNG')
29
+ os.remove(origem)
30
+ print('-----------------------------------------')
31
+
32
+ print('\n\nCompleted...')
scripts/Dataset/Rename.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #encoding=utf8
2
+ import os
3
+ import scripts.config as config
4
+
5
+ extension = '.jpg'
6
+
7
+ print('Starting processing...')
8
+ contador = 0
9
+ paths = [os.path.join(config.images, nome) for nome in os.listdir(config.images)]
10
+ files = [arq for arq in paths if os.path.isfile(arq)]
11
+ jpgs = [arq for arq in files if arq.lower().endswith(extension)]
12
+ for img in jpgs:
13
+ imagePath = str(img)
14
+
15
+ oldName = imagePath.replace(config.images, '')
16
+ print('\n ' + oldName)
17
+
18
+ newName = config.images + str(contador) + extension
19
+ os.rename(imagePath, newName)
20
+ contador = contador + 1
21
+
22
+ print('\n\nCompleted...')
scripts/Dataset/Resize.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #encoding=utf8
2
+ import os
3
+ from PIL import Image
4
+ import scripts.config as config
5
+
6
+ def resize_all(target, extensao):
7
+ paths = [os.path.join(target, nome) for nome in os.listdir(target)]
8
+ files = [arq for arq in paths if os.path.isfile(arq)]
9
+ files = [arq for arq in files if arq.lower().endswith(extensao)]
10
+ for img in files:
11
+ imagePath = str(img)
12
+ print(' ' + imagePath)
13
+ try:
14
+ image = Image.open(imagePath)
15
+ resized = image.resize((config.width, config.height), Image.LANCZOS)
16
+ resized.save(imagePath)
17
+ except Exception as e:
18
+ print(f" {imagePath}: {e}")
19
+
20
+ print('Starting processing...')
21
+
22
+ resize_all(config.images, '.jpg')
23
+ resize_all(config.masks, '.png')
24
+
25
+ print('\n\nCompleted...')
scripts/Dataset/TrainVal.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #encoding=utf8
2
+ import os
3
+ import pandas as pd
4
+ import scripts.config as config
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ nameTrain = config.source + 'train.txt'
8
+ nameVal = config.source + 'val.txt'
9
+ percentual = 0.7
10
+
11
+ def nomeOrigem(originalName):
12
+ nome = os.path.join(config.images, originalName)
13
+ return nome
14
+
15
+ def nomeMascara(originalName):
16
+ nome = os.path.join(config.masks, originalName)
17
+ nome = nome.replace('.jpg', '.png')
18
+ return nome
19
+
20
+ print('Starting processing...')
21
+ paths = [os.path.join(config.images, nome) for nome in os.listdir(config.images)]
22
+ files = [arq for arq in paths if os.path.isfile(arq)]
23
+ files = [arq for arq in files if arq.lower().endswith('.jpg')]
24
+
25
+ df = pd.DataFrame({'nome_original': files})
26
+
27
+ df['newNameA'] = [nomeOrigem(nome) for i, nome in enumerate(files)]
28
+ df['newNameB'] = [nomeMascara(nome) for i, nome in enumerate(files)]
29
+
30
+ grupo1, grupo2 = train_test_split(df, train_size=percentual, random_state=42)
31
+
32
+ grupo1[['newNameA', 'newNameB']].to_csv(nameTrain, sep='\t', index=False, header=False)
33
+ grupo2[['newNameA', 'newNameB']].to_csv(nameVal, sep='\t', index=False, header=False)
34
+
35
+ print("Groups saved in '" + nameTrain + "' and '" + nameVal + "'")
scripts/Dataset/__init__.py ADDED
File without changes
scripts/Dataset/dataAugmentation.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ from PIL import Image
5
+ import albumentations as A
6
+ import scripts.config as config
7
+
8
+ input_img_dir = config.images
9
+ out_img_dir = config.images #+ "/images_aug"
10
+
11
+ input_mask_dir = config.masks
12
+ out_mask_dir = config.masks #+ "/masks_aug"
13
+
14
+ print('Starting...')
15
+
16
+ os.makedirs(out_img_dir, exist_ok=True)
17
+ os.makedirs(out_mask_dir, exist_ok=True)
18
+
19
+ transformations = [
20
+ ("flip", A.HorizontalFlip(p=1.0)),
21
+ ("rot15", A.Rotate(limit=15, p=1.0)),
22
+ ("contrast", A.RandomBrightnessContrast(p=1.0)),
23
+ ]
24
+
25
+ for fname in os.listdir(input_img_dir):
26
+ if not fname.endswith(".jpg"): continue
27
+ base = fname.replace(".jpg", "")
28
+
29
+ img_path = os.path.join(input_img_dir, fname)
30
+ mask_path = os.path.join(input_mask_dir, base + "_mask.png")
31
+
32
+ img = np.array(Image.open(img_path).convert("L"))
33
+ mask = np.array(Image.open(mask_path).convert("L"))
34
+ mask = (mask > 127).astype('uint8')
35
+
36
+ for name, tf in transformations:
37
+ aug = A.Compose([tf])
38
+ augmented = aug(image=img, mask=mask)
39
+ img_aug = augmented['image']
40
+ mask_aug = augmented['mask']
41
+
42
+ img_out = os.path.join(out_img_dir, f"{base}_aug_{name}.jpg")
43
+ mask_out = os.path.join(out_mask_dir, f"{base}_aug_{name}_mask.png")
44
+
45
+ cv2.imwrite(img_out, img_aug)
46
+ cv2.imwrite(mask_out, mask_aug * 255)
47
+
48
+ print('Completed...')
scripts/Dataset/deleteDuplicates.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import imagehash
3
+ from PIL import Image
4
+ import scripts.config as config
5
+
6
+ hashes = {}
7
+ duplicates = []
8
+
9
+ paths = [os.path.join(config.images, nome) for nome in os.listdir(config.images)]
10
+ files = [arq for arq in paths if os.path.isfile(arq)]
11
+ fileNames = [arq for arq in files if arq.lower().endswith('.jpg')]
12
+ for filename in fileNames:
13
+ filepath = filename
14
+ try:
15
+ with Image.open(filepath) as img:
16
+ img_hash = imagehash.phash(img) # Ou: dhash(img), average_hash(img)
17
+ if img_hash in hashes:
18
+ print(f"Duplicate detected: {filename} ≈ {hashes[img_hash]}")
19
+ duplicates.append(filepath)
20
+ else:
21
+ hashes[img_hash] = filename
22
+ except Exception as e:
23
+ print(f"Error processing {filename}: {e}")
24
+
25
+ for dup in duplicates:
26
+ try:
27
+ os.remove(dup)
28
+ print(f"Removed: {dup}")
29
+ except Exception as e:
30
+ print(f"Error removing {dup}: {e}")
31
+
32
+ print(f"\nTotal duplicates removed: {len(duplicates)}")
scripts/Dataset/getDS_HuggingFace.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datasets import load_dataset
3
+
4
+ dataset_name = 'Onegafer/vehicle_segmentation'
5
+
6
+ split = "train" # ou "test", "validation", etc.
7
+ print(f"🔽 Baixando o dataset: {dataset_name}...")
8
+ dataset = load_dataset(dataset_name, split=split)
9
+
10
+ print(f"✅ Dataset carregado com {len(dataset)} amostras.")
11
+ print("Exemplo:", dataset[0])
12
+
13
+ output_dir = '/home/pi/Deposito/Projetos/Meus/CertificacaoHuggingFace/fontes/TempDataSet/'
14
+ os.makedirs(output_dir, exist_ok=True)
15
+ dataset.save_to_disk(os.path.join(output_dir, dataset_name.replace("/", "_")))
16
+
17
+ print(f"💾 Dataset salvo em: {output_dir}/{dataset_name.replace('/', '_')}")
scripts/Dataset/getImages.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import scripts.config as config
3
+ from simple_image_download import simple_image_download as simp
4
+
5
+ termo_busca = "garrote"
6
+ pasta_destino = config.tempImages
7
+ quantidade = 200
8
+
9
+
10
+ if not os.path.exists(pasta_destino):
11
+ os.makedirs(pasta_destino)
12
+
13
+ response = simp.simple_image_download
14
+ response().download(termo_busca, quantidade)
15
+
16
+
17
+ print(f"Download complete! Images saved to: {pasta_destino}")
scripts/Dataset/grays.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #encoding=utf8
2
+ import os
3
+ from PIL import Image
4
+ import scripts.config as config
5
+
6
+ def convert_all(target, extensao):
7
+ paths = [os.path.join(target, nome) for nome in os.listdir(target)]
8
+ files = [arq for arq in paths if os.path.isfile(arq)]
9
+ arquivos = [arq for arq in files if arq.lower().endswith(extensao)]
10
+ for img in arquivos:
11
+ pathImage = str(img)
12
+ print(' ' + pathImage)
13
+
14
+ image = Image.open(pathImage)
15
+ image = image.convert("L")
16
+ image.save(pathImage)
17
+
18
+ print('Starting processing...')
19
+
20
+ convert_all(config.images, '.jpg')
21
+ convert_all(config.masks, '.png')
22
+
23
+ print('\n\nCompleted...')
scripts/Dataset/mask_diagnosis.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from PIL import Image
3
+
4
+ path = '/home/pi/Deposito/Projetos/Meus/CertificacaoHuggingFace/fontes/'
5
+ path = path + 'DataSet/masks/0cdf5b5d0ce1_01_mask.png'
6
+
7
+ mask = Image.open(path).convert("L")
8
+ print(np.unique(np.array(mask)))
scripts/Dataset/masks.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import json
4
+ import numpy as np
5
+ from PIL import Image
6
+ import scripts.config as config
7
+
8
+ print('Starting processing...')
9
+ os.makedirs(config.masks, exist_ok=True)
10
+ for nameFile in os.listdir(config.images):
11
+ if nameFile.endswith('.json'):
12
+ jsonPath = os.path.join(config.images, nameFile)
13
+
14
+ dados = ''
15
+ with open(jsonPath, 'r', encoding='utf-8') as f:
16
+ dados = json.load(f)
17
+
18
+ baseName = os.path.splitext(nameFile)[0]
19
+ imagePath = os.path.join(config.images, baseName + '.jpg')
20
+
21
+ image = cv2.imread(imagePath)
22
+ if image is None:
23
+ print(f"Image {imagePath} not found or invalid.")
24
+ continue
25
+
26
+ height, width = image.shape[:2]
27
+ imgShape = (height, width)
28
+
29
+ contador = 0
30
+ for shape in dados.get('shapes', []):
31
+ if shape.get('label') == 'gado' and shape.get('shape_type') == 'polygon':
32
+ mask = np.zeros(imgShape, dtype=np.uint8)
33
+ pts = np.array(shape['points'], dtype=np.int32)
34
+ cv2.fillPoly(mask, [pts], color=1)
35
+
36
+ imgMask = (mask * 255).astype(np.uint8)
37
+ imgMask = Image.fromarray(imgMask)
38
+
39
+ maskName = f"{baseName}_{contador}.png" if contador > 1 else f"{baseName}.png"
40
+ endPath = os.path.join(config.masks, maskName)
41
+
42
+ imgMask.save(endPath)
43
+ print(f"Saved: {endPath}")
44
+
45
+ contador += 1
46
+
47
+ print('\n\nCompleted...')
scripts/Dataset/validMasks.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ from PIL import Image
5
+ import scripts.config as config
6
+ import matplotlib.pyplot as plt
7
+
8
+ print('Starting processing...')
9
+ paths = [os.path.join(config.masks, nome) for nome in os.listdir(config.masks)]
10
+ files = [arq for arq in paths if os.path.isfile(arq)]
11
+ masks = [arq for arq in files if arq.lower().endswith('.png')]
12
+ for maskName in masks:
13
+
14
+ imgName = str(maskName)
15
+ imgName = imgName.replace(config.masks, '')
16
+ imgName = imgName.replace('.png', '.jpg')
17
+ if('_' in imgName):
18
+ vetImgName = imgName.split('_')
19
+ imgName = vetImgName[0] + '.jpg'
20
+
21
+ print('imgName = ', str(imgName).replace(config.images, ''))
22
+ print('maskName = ', str(maskName).replace(config.masks, ''))
23
+
24
+ imgPath = os.path.join(config.images, imgName)
25
+ img = Image.open(imgPath).convert("L").resize((config.height, config.width))
26
+ img_np = np.array(img)
27
+
28
+ maskPath = os.path.join(config.masks, maskName)
29
+ mask = Image.open(maskPath).resize((config.height, config.width)).convert("L")
30
+ mask_np = np.array(mask) // 255
31
+
32
+ overlay = cv2.cvtColor(img_np, cv2.COLOR_GRAY2BGR)
33
+ overlay[mask_np == 1] = [255, 0, 0]
34
+
35
+ output = cv2.addWeighted(cv2.cvtColor(img_np, cv2.COLOR_GRAY2BGR), 0.7, overlay, 0.3, 0)
36
+
37
+ plt.figure(figsize=(6, 6))
38
+ plt.imshow(output)
39
+ plt.title(f"Overlay")
40
+ plt.axis("off")
41
+ plt.show()
42
+
43
+ input("Press ENTER to view the next...")
44
+
45
+ print('\n\nCompleted...')
scripts/Segmentation/Future/__init__.py ADDED
File without changes
scripts/Segmentation/Future/cyber_train.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import datetime
4
+ import numpy as np
5
+ import torch.nn as nn
6
+ import torch.optim as optim
7
+ import matplotlib.pyplot as plt
8
+ import scripts.config as config
9
+ from torch.utils.data import DataLoader
10
+ from sklearn.metrics import jaccard_score
11
+ from torch.utils.data import random_split
12
+ import torchvision.transforms as transforms
13
+ import scripts.Segmentation.augment as augment
14
+ from scripts.Segmentation.models import ResNetUNet
15
+ from warmup_scheduler import GradualWarmupScheduler
16
+ from scripts.Segmentation.focalLoss import FocalLoss
17
+ from scripts.Segmentation.segDS import SegmentationDataset
18
+ from scripts.Segmentation.diceLossCriterion import DiceLoss
19
+
20
+ def save_report(row):
21
+ print(str(row))
22
+ with open(config.report_file, 'a', encoding='utf-8') as f:
23
+ f.write(str(row) + '\n')
24
+
25
+ def compute_class_weights(dataset):
26
+ class_counts = torch.zeros(2)
27
+ for _, mask in dataset:
28
+ pixels = mask.view(-1)
29
+ for c in [0, 1]:
30
+ class_counts[c] += (pixels == c).sum()
31
+ weights = class_counts.sum() / (2.0 * class_counts + 1e-6)
32
+ weights = weights / weights.sum()
33
+ return weights
34
+
35
+ def auto_detect_hyperparams(model, train_loader, val_loader):
36
+ candidate_configs = [
37
+ {"lr": 1e-3, "wd": 1e-5, "use_focal": False, "dice_weight": 1.0},
38
+ {"lr": 1e-4, "wd": 1e-4, "use_focal": True, "dice_weight": 1.5},
39
+ {"lr": 5e-4, "wd": 1e-5, "use_focal": False, "dice_weight": 2.0},
40
+ {"lr": 1e-4, "wd": 1e-6, "use_focal": True, "dice_weight": 1.0},
41
+ ]
42
+
43
+ best_score = -float('inf')
44
+ best_config = candidate_configs[0]
45
+
46
+ for cfg in candidate_configs:
47
+ temp_model = ResNetUNet(num_classes=2).to(config.device)
48
+ optimizer = optim.AdamW(temp_model.parameters(), lr=cfg["lr"], weight_decay=cfg["wd"])
49
+ criterion = FocalLoss(gamma=2.0) if cfg["use_focal"] else nn.CrossEntropyLoss()
50
+ dice_loss = DiceLoss()
51
+ temp_model.train()
52
+ for i, (images, masks) in enumerate(train_loader):
53
+ if i > 2: break
54
+ images = images.to(config.device).float()
55
+ masks = masks.to(config.device).long()
56
+ outputs = temp_model(images)
57
+ loss = cfg["dice_weight"] * dice_loss(outputs, masks) + 0.5 * criterion(outputs, masks)
58
+ optimizer.zero_grad()
59
+ loss.backward()
60
+ optimizer.step()
61
+
62
+ temp_model.eval()
63
+ val_acc = 0
64
+ val_loss = 0
65
+ val_batches = 0
66
+ #with torch.no_grad():
67
+ for images, masks in val_loader:
68
+ images = images.to(config.device).float()
69
+ masks = masks.to(config.device).long()
70
+ outputs = temp_model(images)
71
+ preds = torch.argmax(outputs, dim=1)
72
+ acc = (preds == masks).sum().item() / torch.numel(masks)
73
+ val_acc += acc
74
+ loss = cfg["dice_weight"] * dice_loss(outputs, masks) + 0.5 * criterion(outputs, masks)
75
+ val_loss += loss.item()
76
+ val_batches += 1
77
+ if val_batches > 2: break
78
+
79
+ avg_score = val_acc / val_batches - val_loss / val_batches
80
+ if avg_score > best_score:
81
+ best_score = avg_score
82
+ best_config = cfg
83
+
84
+ return best_config
85
+
86
+ os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
87
+ try:
88
+ save_report(torch.__version__)
89
+ save_report(torch.version.cuda)
90
+ save_report(torch.cuda.get_arch_list()) #['sm_75', 'sm_86']
91
+ except Exception as e:
92
+ save_report(e)
93
+ pass
94
+
95
+ config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
96
+ print('Starting processing...')
97
+
98
+ os.makedirs(config.checkpoints, exist_ok=True)
99
+
100
+ transform = transforms.Compose([
101
+ transforms.Resize((config.height, config.width)),
102
+ transforms.ToTensor(),
103
+ transforms.Normalize(mean=[0.5], std=[0.5])
104
+ ])
105
+
106
+ dataset = SegmentationDataset(transform = transform)
107
+
108
+ train_size = int(0.8 * len(dataset))
109
+ val_size = len(dataset) - train_size
110
+ train_ds, val_ds = random_split(dataset, [train_size, val_size])
111
+
112
+ train_loader = DataLoader(train_ds, batch_size=4, shuffle=True)
113
+ val_loader = DataLoader(val_ds, batch_size=4, shuffle=False)
114
+
115
+ model = ResNetUNet(num_classes=2).to(config.device)
116
+
117
+ class_weights = compute_class_weights(dataset).to(config.device)
118
+
119
+
120
+ selected = auto_detect_hyperparams(model, train_loader, val_loader)
121
+ save_report(f"\n🔍 Auto-selected hyperparams: {selected}")
122
+
123
+ criterion = FocalLoss(gamma=2.0) if selected["use_focal"] else nn.CrossEntropyLoss(weight=class_weights)
124
+ optimizer = optim.AdamW(model.parameters(), lr=selected["lr"], weight_decay=selected["wd"])
125
+ dice_loss = DiceLoss()
126
+
127
+
128
+ cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.num_epochs)
129
+ scheduler = GradualWarmupScheduler(optimizer, multiplier=1.0, total_epoch=5, after_scheduler=cosine_scheduler)
130
+
131
+ dataHoraInicial = datetime.datetime.now()
132
+ save_report('\n\n\nStarting training on: ' + str(dataHoraInicial))
133
+
134
+ accuracies = []
135
+ iou_history = []
136
+ loss_history = []
137
+ dice_history = []
138
+ val_accuracies = []
139
+ best_accuracy = 0.0
140
+ dice_loss = DiceLoss()
141
+ epochs_no_improve = 0
142
+
143
+ model.train()
144
+ for epoch in range(config.num_epochs):
145
+ total_loss = 0
146
+ correct_pixels = 0
147
+ total_pixels = 0
148
+
149
+ for images, masks in train_loader:
150
+ images = images.to(config.device).float()
151
+ masks = masks.to(config.device).long()
152
+
153
+ output = []
154
+
155
+ if config.USE_TTA:
156
+ for img in images:
157
+ preds = augment.predict_with_tta(model, img.unsqueeze(0)) # [1, C, H, W]
158
+ output.append(preds)
159
+ output = torch.cat(output, dim=0) # [B, C, H, W]
160
+ else:
161
+ output = model(images)
162
+
163
+ output = output.float()
164
+ masks = masks.long()
165
+
166
+ loss = selected["dice_weight"] * dice_loss(output, masks) + 0.5 * criterion(output, masks)
167
+
168
+ optimizer.zero_grad()
169
+ loss.backward()
170
+ optimizer.step()
171
+ scheduler.step()
172
+
173
+ preds = torch.argmax(output, dim=1)
174
+
175
+ if config.USE_REFINEMENT:
176
+ preds = augment.refine_mask(preds)
177
+
178
+ correct_pixels += (preds == masks).sum().item()
179
+ total_pixels += torch.numel(preds)
180
+ total_loss += loss.item()
181
+
182
+ loss_history.append(total_loss)
183
+ epoch_accuracy = correct_pixels / total_pixels
184
+ accuracies.append(epoch_accuracy)
185
+
186
+ model.eval()
187
+ val_correct = 0
188
+ val_total = 0
189
+ val_preds_all = []
190
+ val_targets_all = []
191
+ for val_images, val_masks in val_loader:
192
+ val_images = val_images.to(config.device).float()
193
+ val_masks = val_masks.to(config.device).long()
194
+
195
+ val_outputs = model(val_images)
196
+ val_preds = torch.argmax(val_outputs, dim=1)
197
+
198
+ val_preds_all.append(val_preds.view(-1).cpu().numpy())
199
+ val_targets_all.append(val_masks.view(-1).cpu().numpy())
200
+
201
+ val_correct += (val_preds == val_masks).sum().item()
202
+ val_total += torch.numel(val_preds)
203
+
204
+ val_accuracy = val_correct / val_total
205
+ val_accuracies.append(val_accuracy)
206
+
207
+ val_preds_flat = np.concatenate(val_preds_all)
208
+ val_targets_flat = np.concatenate(val_targets_all)
209
+
210
+ #iou = jaccard_score(val_targets_flat, val_preds_flat, average='binary')
211
+ iou = jaccard_score(val_targets_flat, val_preds_flat, average='macro') # ou 'weighted'
212
+
213
+ intersection = np.logical_and(val_preds_flat, val_targets_flat).sum()
214
+ union = np.logical_or(val_preds_flat, val_targets_flat).sum()
215
+ dice = (2 * intersection) / (val_preds_flat.sum() + val_targets_flat.sum() + 1e-6)
216
+ iou_history.append(iou)
217
+ dice_history.append(dice)
218
+
219
+ save_report(f"Epoch {epoch+1}/{config.num_epochs}, Loss: {total_loss:.4f}, "
220
+ f"Train Acc: {epoch_accuracy:.4f}, Val Acc: {val_accuracy:.4f}, "
221
+ f"IoU: {iou:.4f}, Dice: {dice:.4f}")
222
+
223
+
224
+ if (epoch + 1) % config.checkpoint_interval == 0:
225
+ checkpoint_path = os.path.join(config.checkpoints, f"checkpoint_epoch_{epoch+1}.pt")
226
+ torch.save({
227
+ 'epoch': epoch + 1,
228
+ 'model_state_dict': model.state_dict(),
229
+ 'optimizer_state_dict': optimizer.state_dict(),
230
+ 'loss': total_loss,
231
+ 'accuracy': epoch_accuracy,
232
+ }, checkpoint_path)
233
+
234
+ if epoch_accuracy > best_accuracy:
235
+ save_report(f"🔸 New best model at epoch {epoch+1} (acc: {epoch_accuracy:.4f}) — saving best_model.pt")
236
+ best_accuracy = epoch_accuracy
237
+ torch.save(model.state_dict(), os.path.join(config.checkpoints, "best_model.pt"))
238
+ epochs_no_improve = 0
239
+ else:
240
+ epochs_no_improve += 1
241
+
242
+ if epochs_no_improve >= config.early_stop_patience:
243
+ save_report(f"\n⛔ Early stopping triggered at epoch {epoch+1}")
244
+ break
245
+
246
+ dataHoraFinal = datetime.datetime.now()
247
+ save_report('Completing training on: ' + str(dataHoraFinal))
248
+ save_report('Total training execution time = ' + str((dataHoraFinal - dataHoraInicial)))
249
+
250
+ model.eval()
251
+
252
+ torch.save(model, config.modelName)
253
+
254
+ try:
255
+ plt.figure(figsize=(8, 5))
256
+ plt.plot(range(1, len(loss_history)+1), loss_history, marker='o')
257
+ plt.title("Loss Evolution")
258
+ plt.xlabel("Epochs")
259
+ plt.ylabel("Loss")
260
+ plt.grid()
261
+ plt.tight_layout()
262
+ plt.savefig(config.source + 'training_loss.png')
263
+
264
+ plt.figure(figsize=(8, 5))
265
+ plt.plot(range(1, len(val_accuracies)+1), val_accuracies, marker='o', color='green')
266
+ plt.title("Validation Accuracy")
267
+ plt.xlabel("Epochs")
268
+ plt.ylabel("Pixel Accuracy")
269
+ plt.grid()
270
+ plt.tight_layout()
271
+ plt.savefig(config.source + 'training_val_accuracy.png')
272
+
273
+ plt.figure(figsize=(8, 5))
274
+ plt.plot(range(1, len(iou_history)+1), iou_history, marker='o', color='purple')
275
+ plt.title("IoU Evolution")
276
+ plt.xlabel("Epochs")
277
+ plt.ylabel("IoU Score")
278
+ plt.grid()
279
+ plt.tight_layout()
280
+ plt.savefig(config.source + 'iou_history.png')
281
+
282
+ plt.figure(figsize=(8, 5))
283
+ plt.plot(range(1, len(dice_history)+1), dice_history, marker='o', color='orange')
284
+ plt.title("Dice Score Evolution")
285
+ plt.xlabel("Epochs")
286
+ plt.ylabel("Dice Score")
287
+ plt.grid()
288
+ plt.tight_layout()
289
+ plt.savefig(config.source + 'dice_history.png')
290
+
291
+ except Exception as e:
292
+ pass
293
+
294
+ save_report("\nTraining Summary:")
295
+ save_report(f" Min Loss: {min(loss_history):.4f}")
296
+ save_report(f" Max Loss: {max(loss_history):.4f}")
297
+ save_report(f" Loss final: {loss_history[-1]:.4f}")
298
+ save_report(f" Best Val Acc: {max(val_accuracies):.4f}")
299
+ print("\nCompleted ✅")
scripts/Segmentation/Future/train_embedded_explicit_model.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NOTE: This script is a planned extension for embedded deployment.
2
+ # It has not been tested or benchmarked in this certification phase.
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.optim as optim
7
+ import scripts.config as config
8
+ from torchvision import transforms
9
+ from torch.utils.data import DataLoader
10
+ from scripts.Segmentation.segDS import SegmentationDataset
11
+
12
+ # ----------------------
13
+ # Compact U-Net-like model defined explicitly
14
+ # ----------------------
15
+ class EmbeddedUNet(nn.Module):
16
+ def __init__(self):
17
+ super().init()
18
+
19
+ # Encoder
20
+ self.enc1 = nn.Sequential(
21
+ nn.Conv2d(1, 8, 3, padding=1),
22
+ nn.ReLU(),
23
+ nn.MaxPool2d(2) # 128 -> 64
24
+ )
25
+ self.enc2 = nn.Sequential(
26
+ nn.Conv2d(8, 16, 3, padding=1),
27
+ nn.ReLU(),
28
+ nn.MaxPool2d(2) # 64 -> 32
29
+ )
30
+
31
+ # Bottleneck
32
+ self.bottleneck = nn.Sequential(
33
+ nn.Conv2d(16, 32, 3, padding=1),
34
+ nn.ReLU()
35
+ )
36
+
37
+ # Decoder
38
+ self.up1 = nn.Upsample(scale_factor=2, mode='nearest')
39
+ self.dec1 = nn.Sequential(
40
+ nn.Conv2d(32, 16, 3, padding=1),
41
+ nn.ReLU()
42
+ )
43
+ self.up2 = nn.Upsample(scale_factor=2, mode='nearest')
44
+ self.dec2 = nn.Sequential(
45
+ nn.Conv2d(16, 8, 3, padding=1),
46
+ nn.ReLU()
47
+ )
48
+
49
+ # Output
50
+ self.out = nn.Conv2d(8, 2, kernel_size=1) # 2 classes: background + object
51
+
52
+ def forward(self, x):
53
+ x1 = self.enc1(x)
54
+ x2 = self.enc2(x1)
55
+ x = self.bottleneck(x2)
56
+ x = self.up1(x)
57
+ x = self.dec1(x)
58
+ x = self.up2(x)
59
+ x = self.dec2(x)
60
+ return self.out(x)
61
+
62
+ # ----------------------
63
+ # Configuration
64
+ # ----------------------
65
+ IMG_SIZE = (128, 128)
66
+ BATCH_SIZE = 16
67
+ EPOCHS = 20
68
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
69
+
70
+ # ----------------------
71
+ # Data Loading
72
+ # ----------------------
73
+ transform = transforms.Compose([
74
+ transforms.Grayscale(),
75
+ transforms.Resize(IMG_SIZE),
76
+ transforms.ToTensor()
77
+ ])
78
+
79
+ train_ds = SegmentationDataset(config.images,
80
+ config.masks,
81
+ transform)
82
+ train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
83
+
84
+ # ----------------------
85
+ # Training
86
+ # ----------------------
87
+ model = EmbeddedUNet().to(DEVICE)
88
+ criterion = nn.CrossEntropyLoss()
89
+ optimizer = optim.Adam(model.parameters(), lr=1e-3)
90
+
91
+ for epoch in range(EPOCHS):
92
+ model.train()
93
+ total_loss = 0.0
94
+ for imgs, masks in train_dl:
95
+ imgs, masks = imgs.to(DEVICE), masks.long().to(DEVICE)
96
+ optimizer.zero_grad()
97
+ outputs = model(imgs)
98
+ loss = criterion(outputs, masks)
99
+ loss.backward()
100
+ optimizer.step()
101
+ total_loss += loss.item()
102
+
103
+ print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {total_loss:.4f}")
104
+
105
+ # ----------------------
106
+ # Save weights only
107
+ # ----------------------
108
+ torch.save(model.state_dict(), 'embedded_model_weights.pth')
109
+
110
+ # ----------------------
111
+ # Export to ONNX (float16)
112
+ # ----------------------
113
+ model.eval()
114
+ dummy_input = torch.randn(1, 1, *IMG_SIZE).to(DEVICE)
115
+ torch.onnx.export(
116
+ model.half(), # Convert model to float16
117
+ dummy_input.half(), # Dummy input in float16
118
+ "embedded_model_fp16.onnx",
119
+ input_names=["input"],
120
+ output_names=["output"],
121
+ opset_version=12,
122
+ do_constant_folding=True,
123
+ dynamic_axes={'input': {0: 'batch'}, 'output': {0: 'batch'}}
124
+ )
125
+
126
+ print("✅ Model exported as embedded_model_fp16.onnx")
scripts/Segmentation/__init__.py ADDED
File without changes
scripts/Segmentation/app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
4
+ import torch
5
+ import numpy as np
6
+ import gradio as gr
7
+ from PIL import Image
8
+ import scripts.config as config
9
+ import torchvision.transforms as transforms
10
+ import scripts.Segmentation.augment as augment
11
+ from scripts.Segmentation.models import ResNetUNet
12
+
13
+ config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
14
+
15
+ modelo = '/best_model.pt'
16
+ model = ResNetUNet(num_classes=2).to(config.device)
17
+ model.load_state_dict(torch.load(config.checkpoints + modelo, map_location=config.device))
18
+ model.eval()
19
+
20
+ transform = transforms.Compose([
21
+ transforms.Grayscale(num_output_channels=1),
22
+ transforms.Resize((config.height, config.width)),
23
+ transforms.ToTensor(),
24
+ transforms.Normalize(mean=[0.5], std=[0.5])
25
+ ])
26
+
27
+ def segment_image(input_img):
28
+ input_img = input_img.convert('L')
29
+ img_tensor = transform(input_img).unsqueeze(0).to(config.device) # (1, 1, H, W)
30
+
31
+ output = None
32
+ if config.USE_TTA:
33
+ output = augment.predict_with_tta(model, img_tensor)
34
+ else:
35
+ output = model(img_tensor)
36
+
37
+ probs = torch.softmax(output, dim=1) # (1, 2, H, W)
38
+ mask = torch.argmax(probs, dim=1).squeeze(0)
39
+
40
+ if config.USE_REFINEMENT:
41
+ mask = augment.refine_mask(mask)
42
+
43
+ mask = mask.cpu().numpy()
44
+ mask_img = Image.fromarray((mask * 255).astype(np.uint8))
45
+ return input_img, mask_img
46
+
47
+ demo = gr.Interface(
48
+ fn=segment_image,
49
+ inputs=gr.Image(type="pil", label="Input image"),
50
+ outputs=[
51
+ gr.Image(type="pil", label="Original image"),
52
+ gr.Image(type="pil", label="Segmented mask"),
53
+ ],
54
+ title="ResNet-UNet Image Segmentator",
55
+ description="Send an image and see the segmentation result generated by the trained model."
56
+ )
57
+
58
+ if __name__ == "__main__":
59
+ demo.launch(share=True)
scripts/Segmentation/augment.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import torch
3
+ import numpy as np
4
+
5
+ def predict_with_tta(model, image):
6
+ transforms = [
7
+ lambda x: x,
8
+ lambda x: torch.flip(x, dims=[3]),
9
+ lambda x: torch.rot90(x, 1, [2, 3])
10
+ ]
11
+ predictions = []
12
+ for tf in transforms:
13
+ aug = tf(image)
14
+ #with torch.no_grad():
15
+ pred = model(aug)
16
+ inv_pred = tf(pred)
17
+ #predictions.append(torch.softmax(inv_pred, dim=1))
18
+ predictions.append(inv_pred)
19
+
20
+ #avg_pred = torch.stack(predictions).mean(0)
21
+ #return torch.argmax(avg_pred, dim=1).squeeze(0)
22
+ avg_logits = torch.stack(predictions).mean(0) # [B, C, H, W]
23
+ return avg_logits
24
+
25
+ def refine_mask(mask_tensor):
26
+ mask = mask_tensor.cpu().numpy().astype(np.uint8)
27
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
28
+ closed = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
29
+ opened = cv2.morphologyEx(closed, cv2.MORPH_OPEN, kernel)
30
+ return torch.from_numpy(opened).to(mask_tensor.device)
scripts/Segmentation/diceLossCriterion.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class DiceLoss(nn.Module):
5
+
6
+ def __init__(self, smooth=1e-6):
7
+ super(DiceLoss, self).__init__()
8
+ self.smooth = smooth
9
+
10
+ def forward(self, logits, targets):
11
+ logits = logits.float() # [B, C, H, W]
12
+ probs = torch.softmax(logits, dim=1)
13
+ preds = probs[:, 1, :, :] # [B, H, W]
14
+
15
+ if targets.ndim == 4:
16
+ targets = targets.squeeze(1) # [B, H, W]
17
+ targets = (targets == 1).float() # binariza se necessário
18
+
19
+ intersection = (preds * targets).sum(dim=(1, 2))
20
+ union = preds.sum(dim=(1, 2)) + targets.sum(dim=(1, 2))
21
+ dice = (2 * intersection + self.smooth) / (union + self.smooth)
22
+ return 1 - dice.mean()
scripts/Segmentation/evaluate_model.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import torch
4
+ import numpy as np
5
+ from PIL import Image
6
+ import scripts.config as config
7
+ import matplotlib.pyplot as plt
8
+ import torchvision.transforms as transforms
9
+ import scripts.Segmentation.augment as augment
10
+ from scripts.Segmentation.models import ResNetUNet
11
+
12
+ def run():
13
+ config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
14
+
15
+ modelo = "/best_model.pt"
16
+ model = ResNetUNet(num_classes=2)
17
+ model.load_state_dict(torch.load(config.checkpoints + modelo, map_location=config.device))
18
+
19
+ #modelo = "/modelo_completo.pth"
20
+ #model = torch.load(config.checkpoints + modelo, map_location=config.device) #full model
21
+
22
+ #modelo = "/checkpoint_epoch_20.pt"
23
+ #checkpoint = torch.load(config.checkpoints + modelo, map_location=config.device)
24
+ #model = ResNetUNet(num_classes=2)
25
+ #model.load_state_dict(checkpoint['model_state_dict'])
26
+
27
+ model.to(config.device)
28
+ model.eval()
29
+
30
+ transform = transforms.Compose([
31
+ transforms.Grayscale(num_output_channels=1),
32
+ transforms.Resize((config.height, config.width)),
33
+ transforms.ToTensor(),
34
+ transforms.Normalize(mean=[0.5], std=[0.5])
35
+ ])
36
+
37
+
38
+ eval_dir = config.extraTests
39
+ image_files = [f for f in os.listdir(eval_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
40
+
41
+ print(f"Found {len(image_files)} images for evaluation.")
42
+
43
+ for file_name in image_files:
44
+ img_path = os.path.join(eval_dir, file_name)
45
+ image = Image.open(img_path).convert("L") # Grayscale
46
+ input_tensor = transform(image).unsqueeze(0).to(config.device) # shape: [1, 1, H, W]
47
+
48
+ #with torch.no_grad():
49
+ if config.USE_TTA:
50
+ output = augment.predict_with_tta(model, input_tensor)
51
+ else:
52
+ output = model(input_tensor)
53
+
54
+ output = torch.softmax(output, dim=1)
55
+ output = torch.argmax(output, dim=1).squeeze(0)
56
+
57
+ if config.USE_REFINEMENT:
58
+ output = augment.refine_mask(output)
59
+
60
+ predicted_mask = output.cpu().numpy() # shape [H, W]
61
+ image_np = np.array(image.resize((config.width, config.height)), dtype=np.float32) / 255.0 # [H, W]
62
+
63
+ mask_overlay = np.zeros((config.height, config.width, 3), dtype=np.float32)
64
+ mask_overlay[..., 0] = predicted_mask # vermelho onde a máscara = 1
65
+
66
+ image_rgb = np.stack([image_np]*3, axis=-1)
67
+
68
+ alpha = 0.4
69
+ blended = (1 - alpha) * image_rgb + alpha * mask_overlay
70
+
71
+ fig, axs = plt.subplots(1, 3, figsize=(15, 4))
72
+ axs[0].imshow(image_np, cmap='gray')
73
+ axs[0].set_title('Rated Image')
74
+ axs[0].axis('off')
75
+
76
+ axs[1].imshow(predicted_mask, cmap='jet')
77
+ axs[1].set_title('Predicted Mask')
78
+ axs[1].axis('off')
79
+
80
+ axs[2].imshow(blended)
81
+ axs[2].set_title('Overlay')
82
+ axs[2].axis('off')
83
+
84
+ plt.suptitle(f"Rating: {file_name}", fontsize=12)
85
+ plt.tight_layout()
86
+ plt.show()
87
+
88
+ input("Press ENTER to continue...")
89
+
90
+ print('\n\nCompleted...')
91
+
92
+ if __name__ == "__main__":
93
+ run()
scripts/Segmentation/focalLoss.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class FocalLoss(nn.Module):
5
+
6
+ def __init__(self, gamma=2.0, weight=None):
7
+ super(FocalLoss, self).__init__()
8
+ self.gamma = gamma
9
+ self.ce = nn.CrossEntropyLoss(weight=weight)
10
+
11
+ def forward(self, logits, targets):
12
+ logits = logits.float()
13
+ ce_loss = self.ce(logits, targets)
14
+ pt = torch.exp(-ce_loss)
15
+ return ((1 - pt) ** self.gamma * ce_loss).mean()
scripts/Segmentation/models.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch.nn.functional as F
3
+ import torchvision.models as models
4
+
5
+ class ResNetUNet(nn.Module):
6
+
7
+ def __init__(self, num_classes=2):
8
+ super(ResNetUNet, self).__init__()
9
+ resnet = models.resnet50(pretrained=True)
10
+
11
+ resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
12
+ nn.init.kaiming_normal_(resnet.conv1.weight, mode='fan_out', nonlinearity='relu')
13
+
14
+ self.input_block = nn.Sequential(
15
+ resnet.conv1,
16
+ resnet.bn1,
17
+ resnet.relu
18
+ )
19
+ self.maxpool = resnet.maxpool
20
+
21
+ self.encoder1 = resnet.layer1 # 64→256
22
+ self.encoder2 = resnet.layer2 # 256→512
23
+ self.encoder3 = resnet.layer3 # 512→1024
24
+ self.bottleneck = resnet.layer4 # 1024→2048
25
+
26
+ self.up1 = nn.ConvTranspose2d(2048, 1024, kernel_size=2, stride=2)
27
+ self.up2 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
28
+ self.up3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
29
+ self.up4 = nn.ConvTranspose2d(256, 64, kernel_size=2, stride=2)
30
+ self.up5 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
31
+
32
+ self.conv1 = nn.Sequential(
33
+ nn.Conv2d(1024, 1024, kernel_size=3, padding=1),
34
+ nn.BatchNorm2d(1024),
35
+ nn.ReLU()
36
+ )
37
+ self.conv2 = nn.Sequential(
38
+ nn.Conv2d(512, 512, kernel_size=3, padding=1),
39
+ nn.BatchNorm2d(512),
40
+ nn.ReLU()
41
+ )
42
+ self.conv3 = nn.Sequential(
43
+ nn.Conv2d(256, 256, kernel_size=3, padding=1),
44
+ nn.BatchNorm2d(256),
45
+ nn.ReLU()
46
+ )
47
+ self.conv4 = nn.Sequential(
48
+ nn.Conv2d(64, 64, kernel_size=3, padding=1),
49
+ nn.BatchNorm2d(64),
50
+ nn.ReLU()
51
+ )
52
+
53
+ self.out_conv = nn.Conv2d(64, num_classes, kernel_size=1)
54
+
55
+ def forward(self, x):
56
+ x0 = self.input_block(x)
57
+ x1 = self.maxpool(x0)
58
+ x2 = self.encoder1(x1)
59
+ x3 = self.encoder2(x2)
60
+ x4 = self.encoder3(x3)
61
+ x5 = self.bottleneck(x4)
62
+
63
+ d1 = F.relu(self.up1(x5) + x4)
64
+ d1 = self.conv1(d1)
65
+
66
+ d2 = F.relu(self.up2(d1) + x3)
67
+ d2 = self.conv2(d2)
68
+
69
+ d3 = F.relu(self.up3(d2) + x2)
70
+ d3 = self.conv3(d3)
71
+
72
+ d4 = F.relu(self.up4(d3) + x0)
73
+ d4 = self.conv4(d4)
74
+
75
+ d5 = self.up5(d4)
76
+ out = self.out_conv(d5)
77
+ return out
78
+
scripts/Segmentation/segDS.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import numpy as np
4
+ from PIL import Image
5
+ import scripts.config as config
6
+ from torch.utils.data import Dataset
7
+ import torchvision.transforms as transforms
8
+
9
+ class SegmentationDataset(Dataset):
10
+
11
+ def __init__(self, transform=None):
12
+ self.image_dir = config.images
13
+ self.mask_dir = config.masks
14
+ self.transform = transform
15
+ paths = [os.path.join(self.image_dir, f) for f in os.listdir(self.image_dir) if f.lower().endswith('.jpg')]
16
+ self.image_files = [os.path.basename(f) for f in paths]
17
+
18
+ def __len__(self):
19
+ return len(self.image_files)
20
+
21
+ def __getitem__(self, idx):
22
+ img_name = self.image_files[idx]
23
+ img_path = os.path.join(self.image_dir, img_name)
24
+ mask_path = os.path.join(self.mask_dir, img_name.replace('.jpg', '_mask.png'))
25
+ if not os.path.exists(mask_path):
26
+ raise FileNotFoundError(f"Mask not found for: {img_name}")
27
+
28
+ image = Image.open(img_path).convert("L")
29
+ mask = Image.open(mask_path).convert("L")
30
+
31
+ if self.transform:
32
+ image = self.transform(image)
33
+
34
+ mask = np.array(mask)
35
+ mask = (mask > 127).astype(np.uint8)
36
+ mask = torch.from_numpy(mask).long()
37
+
38
+ unique_vals = np.unique(mask)
39
+ if not set(unique_vals).issubset({0, 1}):
40
+ raise ValueError(f"Mask contains invalid values: {unique_vals}")
41
+
42
+ return image, mask
scripts/Segmentation/train.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import datetime
4
+ import numpy as np
5
+ import torch.nn as nn
6
+ import torch.optim as optim
7
+ import matplotlib.pyplot as plt
8
+ import scripts.config as config
9
+ from torch.utils.data import DataLoader
10
+ from sklearn.metrics import jaccard_score
11
+ from torch.utils.data import random_split
12
+ import torchvision.transforms as transforms
13
+ import scripts.Segmentation.augment as augment
14
+ from scripts.Segmentation.models import ResNetUNet
15
+ from warmup_scheduler import GradualWarmupScheduler
16
+ from scripts.Segmentation.focalLoss import FocalLoss
17
+ from scripts.Segmentation.segDS import SegmentationDataset
18
+ from scripts.Segmentation.diceLossCriterion import DiceLoss
19
+
20
+ def save_report(row):
21
+ print(str(row))
22
+ with open(config.report_file, 'a', encoding='utf-8') as f:
23
+ f.write(str(row) + '\n')
24
+
25
+ def compute_class_weights(dataset):
26
+ class_counts = torch.zeros(2)
27
+ for _, mask in dataset:
28
+ pixels = mask.view(-1)
29
+ for c in [0, 1]:
30
+ class_counts[c] += (pixels == c).sum()
31
+ weights = class_counts.sum() / (2.0 * class_counts + 1e-6)
32
+ weights = weights / weights.sum()
33
+ return weights
34
+
35
+ os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
36
+ try:
37
+ save_report(torch.__version__)
38
+ save_report(torch.version.cuda)
39
+ save_report(torch.cuda.get_arch_list()) #['sm_75', 'sm_86']
40
+ except Exception as e:
41
+ save_report(e)
42
+ pass
43
+
44
+ config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
45
+ print('Starting processing...')
46
+
47
+ os.makedirs(config.checkpoints, exist_ok=True)
48
+
49
+ transform = transforms.Compose([
50
+ transforms.Resize((config.height, config.width)),
51
+ transforms.ToTensor(),
52
+ transforms.Normalize(mean=[0.5], std=[0.5])
53
+ ])
54
+
55
+ dataset = SegmentationDataset(transform = transform)
56
+
57
+ train_size = int(0.8 * len(dataset))
58
+ val_size = len(dataset) - train_size
59
+ train_ds, val_ds = random_split(dataset, [train_size, val_size])
60
+
61
+ train_loader = DataLoader(train_ds, batch_size=4, shuffle=True)
62
+ val_loader = DataLoader(val_ds, batch_size=4, shuffle=False)
63
+
64
+ model = ResNetUNet(num_classes=2).to(config.device)
65
+
66
+ class_weights = compute_class_weights(dataset).to(config.device)
67
+
68
+ criterion = None
69
+ if config.USE_FOCAL_LOSS:
70
+ criterion = FocalLoss(gamma=2.0)
71
+ else:
72
+ criterion = nn.CrossEntropyLoss(weight = class_weights)
73
+
74
+ #optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
75
+ optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
76
+
77
+ #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
78
+ #scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.num_epochs)
79
+ cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.num_epochs)
80
+ scheduler = GradualWarmupScheduler(optimizer, multiplier=1.0, total_epoch=5, after_scheduler=cosine_scheduler)
81
+
82
+ dataHoraInicial = datetime.datetime.now()
83
+ save_report('\n\n\nStarting training on: ' + str(dataHoraInicial))
84
+
85
+ accuracies = []
86
+ iou_history = []
87
+ loss_history = []
88
+ dice_history = []
89
+ val_accuracies = []
90
+ best_accuracy = 0.0
91
+ dice_loss = DiceLoss()
92
+ epochs_no_improve = 0
93
+
94
+ model.train()
95
+ for epoch in range(config.num_epochs):
96
+ total_loss = 0
97
+ correct_pixels = 0
98
+ total_pixels = 0
99
+
100
+ for images, masks in train_loader:
101
+ images = images.to(config.device).float()
102
+ masks = masks.to(config.device).long()
103
+
104
+ output = []
105
+
106
+ if config.USE_TTA:
107
+ for img in images:
108
+ preds = augment.predict_with_tta(model, img.unsqueeze(0)) # [1, C, H, W]
109
+ output.append(preds)
110
+ output = torch.cat(output, dim=0) # [B, C, H, W]
111
+ else:
112
+ output = model(images)
113
+
114
+ output = output.float()
115
+ masks = masks.long()
116
+
117
+ loss = 1.5 * dice_loss(output, masks) + 0.5 * criterion(output, masks)
118
+
119
+ optimizer.zero_grad()
120
+ loss.backward()
121
+ optimizer.step()
122
+ scheduler.step()
123
+
124
+ preds = torch.argmax(output, dim=1)
125
+
126
+ if config.USE_REFINEMENT:
127
+ preds = augment.refine_mask(preds)
128
+
129
+ correct_pixels += (preds == masks).sum().item()
130
+ total_pixels += torch.numel(preds)
131
+ total_loss += loss.item()
132
+
133
+ loss_history.append(total_loss)
134
+ epoch_accuracy = correct_pixels / total_pixels
135
+ accuracies.append(epoch_accuracy)
136
+
137
+ model.eval()
138
+ val_correct = 0
139
+ val_total = 0
140
+ val_preds_all = []
141
+ val_targets_all = []
142
+ for val_images, val_masks in val_loader:
143
+ val_images = val_images.to(config.device).float()
144
+ val_masks = val_masks.to(config.device).long()
145
+
146
+ val_outputs = model(val_images)
147
+ val_preds = torch.argmax(val_outputs, dim=1)
148
+
149
+ val_preds_all.append(val_preds.view(-1).cpu().numpy())
150
+ val_targets_all.append(val_masks.view(-1).cpu().numpy())
151
+
152
+ val_correct += (val_preds == val_masks).sum().item()
153
+ val_total += torch.numel(val_preds)
154
+
155
+ val_accuracy = val_correct / val_total
156
+ val_accuracies.append(val_accuracy)
157
+
158
+ val_preds_flat = np.concatenate(val_preds_all)
159
+ val_targets_flat = np.concatenate(val_targets_all)
160
+
161
+ #iou = jaccard_score(val_targets_flat, val_preds_flat, average='binary')
162
+ iou = jaccard_score(val_targets_flat, val_preds_flat, average='macro') # ou 'weighted'
163
+
164
+ intersection = np.logical_and(val_preds_flat, val_targets_flat).sum()
165
+ union = np.logical_or(val_preds_flat, val_targets_flat).sum()
166
+ dice = (2 * intersection) / (val_preds_flat.sum() + val_targets_flat.sum() + 1e-6)
167
+ iou_history.append(iou)
168
+ dice_history.append(dice)
169
+
170
+ save_report(f"Epoch {epoch+1}/{config.num_epochs}, Loss: {total_loss:.4f}, "
171
+ f"Train Acc: {epoch_accuracy:.4f}, Val Acc: {val_accuracy:.4f}, "
172
+ f"IoU: {iou:.4f}, Dice: {dice:.4f}")
173
+
174
+
175
+ if (epoch + 1) % config.checkpoint_interval == 0:
176
+ checkpoint_path = os.path.join(config.checkpoints, f"checkpoint_epoch_{epoch+1}.pt")
177
+ torch.save({
178
+ 'epoch': epoch + 1,
179
+ 'model_state_dict': model.state_dict(),
180
+ 'optimizer_state_dict': optimizer.state_dict(),
181
+ 'loss': total_loss,
182
+ 'accuracy': epoch_accuracy,
183
+ }, checkpoint_path)
184
+
185
+ if epoch_accuracy > best_accuracy:
186
+ save_report(f"🔸 New best model at epoch {epoch+1} (acc: {epoch_accuracy:.4f}) — saving best_model.pt")
187
+ best_accuracy = epoch_accuracy
188
+ torch.save(model.state_dict(), os.path.join(config.checkpoints, "best_model.pt"))
189
+ epochs_no_improve = 0
190
+ else:
191
+ epochs_no_improve += 1
192
+
193
+ if epochs_no_improve >= config.early_stop_patience:
194
+ save_report(f"\n⛔ Early stopping triggered at epoch {epoch+1}")
195
+ break
196
+
197
+ dataHoraFinal = datetime.datetime.now()
198
+ save_report('Completing training on: ' + str(dataHoraFinal))
199
+ save_report('Total training execution time = ' + str((dataHoraFinal - dataHoraInicial)))
200
+
201
+ model.eval()
202
+
203
+ torch.save(model, config.modelName)
204
+
205
+ try:
206
+ plt.figure(figsize=(8, 5))
207
+ plt.plot(range(1, len(loss_history)+1), loss_history, marker='o')
208
+ plt.title("Loss Evolution")
209
+ plt.xlabel("Epochs")
210
+ plt.ylabel("Loss")
211
+ plt.grid()
212
+ plt.tight_layout()
213
+ plt.savefig(config.source + 'training_loss.png')
214
+
215
+ plt.figure(figsize=(8, 5))
216
+ plt.plot(range(1, len(val_accuracies)+1), val_accuracies, marker='o', color='green')
217
+ plt.title("Validation Accuracy")
218
+ plt.xlabel("Epochs")
219
+ plt.ylabel("Pixel Accuracy")
220
+ plt.grid()
221
+ plt.tight_layout()
222
+ plt.savefig(config.source + 'training_val_accuracy.png')
223
+
224
+ plt.figure(figsize=(8, 5))
225
+ plt.plot(range(1, len(iou_history)+1), iou_history, marker='o', color='purple')
226
+ plt.title("IoU Evolution")
227
+ plt.xlabel("Epochs")
228
+ plt.ylabel("IoU Score")
229
+ plt.grid()
230
+ plt.tight_layout()
231
+ plt.savefig(config.source + 'iou_history.png')
232
+
233
+ plt.figure(figsize=(8, 5))
234
+ plt.plot(range(1, len(dice_history)+1), dice_history, marker='o', color='orange')
235
+ plt.title("Dice Score Evolution")
236
+ plt.xlabel("Epochs")
237
+ plt.ylabel("Dice Score")
238
+ plt.grid()
239
+ plt.tight_layout()
240
+ plt.savefig(config.source + 'dice_history.png')
241
+
242
+ except Exception as e:
243
+ pass
244
+
245
+ save_report("\nTraining Summary:")
246
+ save_report(f" Min Loss: {min(loss_history):.4f}")
247
+ save_report(f" Max Loss: {max(loss_history):.4f}")
248
+ save_report(f" Loss final: {loss_history[-1]:.4f}")
249
+ save_report(f" Best Val Acc: {max(val_accuracies):.4f}")
250
+ print("\nCompleted ✅")
scripts/__init__.py ADDED
File without changes
scripts/config.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ device = ''
4
+ width = 512
5
+ height = 512
6
+
7
+ num_epochs = 100
8
+ checkpoint_interval = 2
9
+ early_stop_patience = 10
10
+
11
+ USE_TTA = True
12
+ USE_REFINEMENT = True
13
+ USE_FOCAL_LOSS = False
14
+
15
+ source = str(os.path.dirname(os.path.realpath(__file__))).replace('scripts', '')
16
+ images = source + 'DataSet/images/'
17
+ masks = source + 'DataSet/masks/'
18
+ annotations = source + 'DataSet/annotations/'
19
+ extraTests = source + 'DataSet/ExtraTests/'
20
+ tempImages = source + 'DataSet/tempImages/'
21
+ checkpoints = source + 'checkpoints/'
22
+
23
+ modelName = checkpoints + 'modelo_completo.pth'# 'modelo_completo.pth'
24
+ report_file = source + 'report_file.txt'
25
+