AndreCosta commited on Jul 27

Commit

7b615ae

1 Parent(s): b3932dd

Initial clean commit with LFS configured

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
.huggingface/model-index.yaml +32 -0
.project +17 -0
.pydevproject +11 -0
.settings/org.eclipse.core.resources.prefs +6 -0
CHANGELOG.md +481 -0
DataSet/ExtraTests/29bb3ece3180_11.jpg +3 -0
DataSet/annotations/classes.txt +2 -0
DataSet/images/0cdf5b5d0ce1_01.jpg +3 -0
DataSet/masks/0cdf5b5d0ce1_01_mask.png +3 -0
LICENSE +20 -0
README.md +345 -3
__init__.py +0 -0
bad_image.png +3 -0
checkpoints/best_model.pt +3 -0
dice_history.png +3 -0
good_image.png +3 -0
iou_history.png +3 -0
model_card.md +122 -0
report_file.txt +56 -0
requirements.txt +26 -0
run_app.py +9 -0
run_evaluate.py +9 -0
scripts/Dataset/ConvertFormat.py +32 -0
scripts/Dataset/Rename.py +22 -0
scripts/Dataset/Resize.py +25 -0
scripts/Dataset/TrainVal.py +35 -0
scripts/Dataset/__init__.py +0 -0
scripts/Dataset/dataAugmentation.py +48 -0
scripts/Dataset/deleteDuplicates.py +32 -0
scripts/Dataset/getDS_HuggingFace.py +17 -0
scripts/Dataset/getImages.py +17 -0
scripts/Dataset/grays.py +23 -0
scripts/Dataset/mask_diagnosis.py +8 -0
scripts/Dataset/masks.py +47 -0
scripts/Dataset/validMasks.py +45 -0
scripts/Segmentation/Future/__init__.py +0 -0
scripts/Segmentation/Future/cyber_train.py +299 -0
scripts/Segmentation/Future/train_embedded_explicit_model.py +126 -0
scripts/Segmentation/__init__.py +0 -0
scripts/Segmentation/app.py +59 -0
scripts/Segmentation/augment.py +30 -0
scripts/Segmentation/diceLossCriterion.py +22 -0
scripts/Segmentation/evaluate_model.py +93 -0
scripts/Segmentation/focalLoss.py +15 -0
scripts/Segmentation/models.py +78 -0
scripts/Segmentation/segDS.py +42 -0
scripts/Segmentation/train.py +250 -0
scripts/__init__.py +0 -0
scripts/config.py +25 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text

.huggingface/model-index.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+# .huggingface/model-index.yaml
+model-index:
+  - name: ResNet-UNet Segmenter
+  results:
+    - task:
+        type: image-segmentation
+        name: Image Segmentation
+      dataset:
+        name: Carvana Subset (Indoor Controlled)
+        type: image-segmentation
+      metrics:
+        - type: iou
+          value: 0.994
+        - type: dice
+          value: 0.996
+    metadata:
+      library_name: pytorch
+      tags:
+        - image-segmentation
+        - unet
+        - resnet
+        - computer-vision
+        - binary-segmentation
+        - grayscale
+      license: mit
+      framework: pytorch
+      task:
+        type: image-segmentation
+        name: Image Segmentation
+      datasets:
+        - name: CV Image Segmentation (Carvana subset)
+          type: image-segmentation

.project ADDED Viewed

	@@ -0,0 +1,17 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>Hooging_Face_CV_certification</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.python.pydev.PyDevBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.python.pydev.pythonNature</nature>
+	</natures>
+</projectDescription>

.pydevproject ADDED Viewed

	@@ -0,0 +1,11 @@

+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?><pydev_project>
+    <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
+    <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python interpreter</pydev_property>
+    <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
+        <path>/${PROJECT_DIR_NAME}</path>
+    </pydev_pathproperty>
+</pydev_project>

.settings/org.eclipse.core.resources.prefs ADDED Viewed

	@@ -0,0 +1,6 @@

+eclipse.preferences.version=1
+encoding//scripts/Dataset/ConvertFormat.py=utf8
+encoding//scripts/Dataset/Rename.py=utf8
+encoding//scripts/Dataset/Resize.py=utf8
+encoding//scripts/Dataset/TrainVal.py=utf8
+encoding//scripts/Dataset/grays.py=utf8

CHANGELOG.md ADDED Viewed

	@@ -0,0 +1,481 @@

+# 📌 Changelog
+All notable changes to this project will be documented in this file.
+This file follows the [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format and semantic versioning.
+ [v0.15.0]
+ - 2025-07-19 - Version Analysis
+Adjustments made in model evaluation scripts ("evaluate_model.py" and "app.py") to allow to work with the new architecture of the model.
+🟠 [Analysis] Overfitting detection in the latest training seasons
+During the assessment of saved checkpoints (especially at more advanced times), progressive signs of overfitting were observed. The model began to identify regions outside the object of interest, including unwanted noises and leftovers in segmentation - behavior not observed during the initial training epochs.
+This effect was progressive: the greater the number of times, the more the model "hallucinates" regions, extrapolating the real limits of the expected mask.
+📌 Implications:
+Clear signal that the model is memorizing patterns of the training set, including irrelevant visual artifacts or standards.
+It can compromise the generalization of the model in real environments or unaware images.
+- 2025-07-18
+✅ Adjustments to the loss and TTA pipeline
+Fixed the DiceLoss function
+Added shape checking and compatibility between preds and targets to avoid broadcast errors.
+Included safe conversion to float after binary comparison for class 1 masks.
+Ensures that targets are correctly reduced if they have dimensions [B, 1, H, W].
+Modified predict_with_tta to return logits
+Created the predict_with_tta_logits function, which returns the mean of the logits before argmax, allowing direct use with loss functions such as CrossEntropyLoss and DiceLoss.
+Maintained compatibility with the TTA structure, but now without compromising the backward compatibility of the training pipeline.
+Standardization of outputs for use in loss analysis
+Training adapted to always deliver logit tensors [B, C, H, W] for the loss criterion, regardless of the use of TTA.
+[v0.14.0] - 2025-07-17 - Conditional Control via Settings
+Improvements and Additions:
+🔧 Added configurable flags at the beginning of the script (USE_TTA, USE_REFINEMENT, USE_FOCAL_LOSS, GAMMA_FOCAL) to enable/disable advanced behaviors in the pipeline in a simple and controlled manner.
+✅ Automatic loss function selection based on the flag:
+CrossEntropyLoss default.
+FocalLoss with adjustable gamma (via GAMMA_FOCAL), activateable by flag.
+🔁 Test-Time Augmentation (TTA) now optionally applicable in the inference phase:
+Includes flips and rotations with automatic inversion.
+Final prediction by averaging probability maps.
+🧼 Mask smoothing via morphological closing/opening with cv2, also controllable by flag.
+Applies morphological refinement to smooth contours and reduce jagged edges.
+Motivation:
+Allows modular experimentation, with a direct impact on validation metrics (IoU/Dice) without altering the core model or recoding sections. Flexibility is essential for controlled experimentation in the R&D cycle.
+[v0.13.0] – 2025-07-16
+⚙️ **Practical Training Adjustments**
+- **Removed the use of `GradualWarmupScheduler`** due to an import error and dependency conflicts — will be re-evaluated in the future.
+- **Kept the new `CosineAnnealingLR`** scheduler, promoting a smooth variation in the learning rate across epochs.
+🧪 **Experimental Configuration**
+- **Adjusted `early_stop_patience` to 40**, allowing the model greater exposure to the data before stopping training due to stagnation.
+💡 Notes
+- Testing with 20,000 512×512 images caused GPU memory overhead — revised strategy for smaller batches and progressive adjustments.
+[v0.12.0]-2025-07-14
+🚀 New Phase: Consistent Dataset and High-Resolution Input
+🧼 Dataset Reconstruction
+Dataset completely recreated from scratch, correcting critical flaws in image-to-mask matching.
+Fixed a bug in the preprocessing script that could mix or swap masks between images.
+Images and masks are now guaranteed to be aligned, with structural consistency and no label pollution.
+📏 Resolution Increase
+Input resolution increased from 256×256 → 512×512, allowing for better definition of shapes and contours.
+Architecture adjusted to support the new dimensions while maintaining U-Net flow with skip connections.
+🧠 Quality and Focus
+Higher information density per image, promoting more refined learning.
+Expected reduction of "coarse blocks" in predictions, with improvements in edges and spatial orientation.
+💡 Notes
+This new stage marks the transition from the exploratory phase to a more mature, validated pipeline aligned with best practices for deep segmentation.
+Future validations will include overlay visualizations, class-based metrics, and qualitative comparisons between versions.
+🎯 Segmentation and robustness refinements
+🧼 Correction of masks with invalid values
+Detected critical error: Some masks had values 2, causing failures in metrics (Valueerror: Unseen Labels).
+Applied solution:
+Converted masks for numpy, binarized with mask = (mask> 127) .astype (np.uint8), ensuring only values 0 and 1.
+Posterior conversion with Torch.from_numpy (...). Long () for use in the model.
+Validation of integrity with NP.unique () and explicit error if unexpected values are found.
+🧪 Pipeline diagnosis
+Added temporary inspection of unique values to masks during __getitem __ () to identify out of standard data.
+📉 Correction of metrics
+Sklearn's Jaccard_Score use adjustment for multiclasses scenarios:
+Replaced averag = 'binary' by averag = 'macro' to avoid error valueerror: target is multiclass ....
+🗃️ Dataset and loader
+Transformations maintained with .convert ("L") to ensure gray scale images and masks despite posterior binarization.
+Confirmation of data alignment between image and mask by direct checking in the loader.
+🛠️ DataSet Stability
+Implemented explicit check of the mask on the disc, avoiding silent failures.
+Pipeline preparation to detect problems early during data loading.
+💡 Observations
+Despite refinements, the model still has rude segmentations ("blocks"), but already indicating directionality consistent with the object (car).
+Next steps include augmentation adjustments, expansion of the receptive Field and improvement in contour capacity.
+[v0.11.0] – 2025-07-13
+🔧 Pipeline and Logging Refactoring
+📄 Created the save_report() method to save training logs to a file continuously and securely (replaces print for traceability in production).
+🧠 Moved the num_epochs, checkpoint_interval, and early_stop_patience parameters to config.py to centralize experiment configuration.
+✅ Removed the fixed use of print() during training, facilitating use on clusters, remote notebooks, and reproducibility.
+🗂️ Updated train.py to run with zero dependency on manual changes: everything is configurable via config.
+🧮 Metric Adjustments
+✅ Fixed and stabilized IoU and Dice Score calculations after each epoch, with automatic logging in the final report.
+🧪 Dice_score calculated via np.logical_and and np.logical_or for greater accuracy and consistency with academic metrics.
+📊 Automatic saving of metrics graphs:
+training_loss.png
+training_val_accuracy.png
+iou_history.png
+dice_history.png
+🧬 Dataset
+🔁 Maintained robust preprocessing and mask binarization.
+💡 Validation of continuous image-mask alignment externally (via separate script) before training.
+🚀 Certification Preparation
+🎯 Stable modular structure, suitable for submission to platforms such as Hugging Face.
+🔒 Centralized logging ensures hardware traceability (CUDA and Torch versions recorded at the start of training).
+[v0.10.0] – 2025-07-12
+🧾 Training Pipeline Refinements and Automated Reporting
+📋 Modularization and Centralized Configuration
+🔧 Critical parameters now defined via scripts/config.py:
+num_epochs = 250
+checkpoint_interval = 15
+early_stop_patience = 60
+Easy adjustments without changing the main training code
+📝 Execution Log with save_report()
+✅ Added save_report(row) method to log:
+Library versions (Torch, CUDA)
+Progress per epoch (Loss, Accuracy, IoU, Dice)
+Training Start and End
+Final Performance Summary
+📁 Logs automatically saved in config.report_file, allowing historical tracking and auditing of the execution
+💡 Notes
+Code now ready for automated cluster executions, CI/CD, or continuous validation pipelines.
+Standardization facilitates future integration with TensorBoard, Gradio, or custom dashboards.
+[v0.9.0] – 2025-07-10
+🎯 Certification and Standards Compliance
+📦 Class Structure and Compliance
+🔄 Inverted dataset classes to follow the conventional pattern:
+0: Background
+1: Object
+Avoids confusion in standard metrics such as CrossEntropy and Jaccard
+🧠 ReLU in Skip Connections
+🚀 Added F.relu(...) activations after up + skip sums in U-Net, improving the ability to learn nonlinearities between blocks
+Fixes linear behavior of activations in the decoding phase
+⚖️ Adjusted Class Balance
+⚙️ compute_class_weights() now uses a more robust normalization formula:
+weights = class_counts.sum() / (2.0 * class_counts + 1e-6)
+weights = weights / weights.sum()
+Avoids overfitting of the minority class without distorting learning.
+📊 Advanced Training Metrics
+✅ Calculation of IoU (Jaccard) and Dice Score on the validation set per epoch:
+sklearn's jaccard_score()
+Dice with intersection / union using NumPy
+📉 Stored as iou_history and dice_history, with graphs saved via matplotlib
+🖼️ Metrics Visualization
+New graphs:
+iou_history.png
+dice_history.png
+All graphs are saved directly, maintaining compatibility with CUDA/headless training environments.
+🧪 Stability and Diagnostics
+Fixed bug TypeError: Cannot interpret '-1' as a data type caused by incorrect types in np.concatenate of PyTorch arrays with .astype('int')
+Now guaranteed Validation data should be np.uint8 to avoid conflicts.
+💡 Notes
+Model now follows the Hugging Face Vision Certification metrics standard.
+Validated pipeline with clear training/validation separation, reliable metrics, and an extensible multiclass structure.
+[v0.8.0] – 2025-07-09
+🧠 Pipeline Architecture and Reconstruction
+✅ Replaced the binary output model with a multiclass architecture (num_classes=2) with CrossEntropyLoss and softmax, allowing future expansion to multi-class segmentations.
+🧪 New composite loss function:
+Implemented custom DiceLoss with smooth=1e-6 for greater sensitivity to contours and thin areas
+Combined with class-weighted CrossEntropyLoss: loss = 0.5 * CrossEntropy + 0.5 * DiceLoss
+📊 Dynamic calculation of class weights:
+Added compute_class_weights() method to balance the loss based on the actual pixel frequency per class in the dataset
+Replaces previous fixed weights, automatically adapting to new datasets
+🧬 Dataset and Preprocessing
+🖼️ Updated Dataset SegmentationDataset:
+Robust loading with mask presence check
+Image and mask conversion to grayscale
+Binarized masks with threshold (mask > 127).long() to ensure values {0, 1}
+🎨 Transformations:
+Applied Resize (256×256) and normalized with mean=[0.5], std=[0.5] for single-channel input
+🏗️ ResNetUNet Model
+🔁 Reconstructed architecture based on resnet50 (pretrained=True):
+Adapted conv1 for single-channel input
+Skip connections with residual sum between encoder and decoder
+Final upsample with nn.Upsample(scale_factor=2) to restore original resolution
+🏋️ Training and Monitoring
+📈 Training with:
+AdamW with lr=1e-4 and weight_decay=1e-4
+StepLR scheduler with gamma=0.5 every 10 epochs
+Early stopping with patience=60
+Checkpoints saved every 15 epochs
+Automatic saving of the best model based on train_accuracy
+📊 Metrics:
+Pixel accuracy for training and validation
+History of loss and accuracy by epoch
+Graphs saved as .png with plt.savefig() (without plt.show())
+💡 Notes
+Model now prepared for multiclass segmentations with greater stability
+More robust and modular pipeline, with a clear separation between architecture, dataset, loss, and training
+Structure ready for integration with metrics such as IoU, F1-score, and visualization with TensorBoard
+[v0.7.0] – 2025-07-07
+🧪 Advanced Binary Segmentation
+✅ Modified architecture: ResNetUNet model adjusted for single-channel output (num_classes=1), with sigmoid applied in the final step — prepared for smooth binary segmentation.
+🧠 Masks reformatted in the dataset: converted to float32 with shape [1, H, W] and binarized via threshold, optimizing compatibility with BCEWithLogitsLoss.
+🎯 New Composite Loss Function
+➕ Implemented custom Dice Loss to improve learning of contours and thin areas, combined with BCEWithLogitsLoss in equal weight.
+🧬 Formula applied: 0.5 * BCE + 0.5 * Dice, increasing the model's sensitivity to the real geometry of the segmented objects.
+🧮 Improved Pixel-Wise Evaluation
+📏 Pixel accuracy adjusted to consider sigmoid and binary threshold (0.5) in predictions before comparing with masks — makes the calculation more faithful to the purpose of segmentation.
+💡 Observations
+Model now captures smoother contours, reducing "square" behavior.
+Code now ready for integration with advanced metrics such as IoU, Precision/Recall per class, and image visualization with matplotlib or TensorBoard.
+[v0.6.0] – 2025-07-07
+🧠 Training Pipeline Refinement
+🔁 Training now separated by training and validation: automatic splitting of the SegmentationDataset into 80/20 to monitor generalization.
+📊 Validation implemented per epoch with accuracy calculation on the validation set; metric used for early stopping and best_model.pt selection.
+📈 Generalization and Robustness
+🌈 Added augmentation transformations via RandomHorizontalFlip and RandomRotation on the training set, to make the model more resistant to visual variations.
+⏳ Early stopping increased: early_stop_patience increased from 20 to 60 epochs, giving more room for progressive learning.
+🔁 Hyperparameters and Regularization
+📉 Added weight_decay=1e-5 in the Adam optimizer for lightweight L2 regularization.
+🎯 Best model metric changed: now best_model.pt saves based on the best validation accuracy, not just training.
+📊 Results visualization
+🖼️ New graph generated training_val_accuracy.png showing the evolution of validation accuracy over epochs.
+📊 Graphs saved with plt.savefig() after try/except, avoiding failures in environments with graphics rendering issues via CUDA.
+💡 Observations
+Model showed qualitative improvement in segmentation with smoother and more responsive contours — previous squares started to follow the car's rotation, indicating spatial learning.
+Structure ready for future calculation of IoU per class and integration with TensorBoard, if necessary.
+## [v0.5.0] – 2025-07-05
+### ⚒️ Critical Data Alignment Fixes
+Fixed mask file mismatch: Masks were stored as .png, but dataset loader expected .jpg extension — caused incorrect or failed loading
+### 🧠 Applied patch via .replace('.jpg', '.png') in dataset loader to ensure proper image-mask pairing
+Added FileNotFoundError checks during __getitem__ to avoid silent failures and improve debugging clarity
+### 🧠 Dataset Refinements
+Ensured matching Resize(256×256) transformations for both image and mask, using transforms.functional for consistency
+Binarization of masks confirmed to produce only {0,1} values, avoiding grayscale range leakage
+Validated with np.unique() on mask tensors — clean value range critical for CrossEntropyLoss
+---
+## [v0.4.0] – 2025-07-04
+### ✅ Major Improvements
+- Added **checkpoint saving** every `N` epochs during training, configurable via a new `checkpoint_interval` parameter.
+- Implemented **Early Stopping** based on pixel accuracy, with a configurable patience (`early_stop_patience`) to avoid overfitting.
+- Final model now saved **twice**:
+  - `best_model.pt`: Only weights, for inference/embedded use
+  - Full model (`torch.save(model, ...)`) at the end, for future reloading
+### ⚠️ Critical Bug Diagnosed and Resolved
+- **Symptom:** Model was training with no improvement; accuracy stuck; no learning observed
+- **Diagnosis:** Masks loaded from `.jpg` were using full grayscale range `[0, 1, ..., 255]` instead of binary values `[0, 1]`
+- **Fix:** Added diagnostic checks using `np.unique` to validate mask classes; incorporated a preprocessing step to binarize masks
+### 🧪 Experimental Enhancements
+- Updated model evaluation interface (`evaluate_model.py`) for batch testing via folder traversal
+- Separated Gradio demo (`app.py`) for certification usability evaluation
+- Integrated plotting of loss and accuracy with graceful error handling (wrapped in `try/except`)
+### 🧠 Observations
+- Problem with **matplotlib crashing** due to CUDA context when using `plt.show()` after training; workaround applied with `plt.savefig()` only
+- CUDA kernel mismatch on certain environments using **dual RTX 4060** detected as 3060 — resolved by adjusting `torch` + `nvidia-driver` stack (manual)
+- Added check to confirm training device (`config.device`) and `torch.cuda.get_arch_list()` for future reproducibility
+### 🤝 Acknowledgements
+- Much of the model debugging was assisted by real-time reasoning and exploration with **ChatGPT**, especially around mask encoding and loss mismatch.
+- Initial development relied on **GitHub Copilot**, with ChatGPT joining later to refator, modularize, and refine robustness for submission.
+---
+## [v0.3.0] – 2025-06-22
+🐛 Bug Fixes
+- Fixed `RuntimeError: only batches of spatial targets supported (3D tensors)` caused by mask dimensions
+- Applied `.squeeze(1)` to target tensors before passing to `CrossEntropyLoss`, ensuring correct shape `(B, H, W)`
+- Root cause: mask loaded with shape `(B, 1, H, W)` instead of `(B, H, W)`
+👁️ Observations
+- Issue identified during initial model training with grayscale images and ResNet-based U-Net
+- Fix reduces debugging time from hours to seconds — thanks to a productive collaboration with Microsoft Copilot 🧠
+---
+## [v0.2.0] – 2025-06-22
+### 🔧 Project Restructure
+- Fully reorganized project files to reflect a modular and scalable architecture
+- Added new root folders:
+  - `DataSet/Cow_Segmentation_Dataset/` to centralize all data and annotations
+  - `scripts/Dataset/` for preprocessing and data preparation logic
+  - `scripts/Segmentation/` for training, evaluation, and model utilities
+  - `scripts/Segmentation/Future/` to house experimental/embedded extensions
+### 📑 Documentation Updates
+- Updated `README.md` to match new folder organization and include a **Future Work** section
+- Updated `model_card.md` to reflect modular design and embedded plans
+### 💡 Future-Ready Additions
+- Introduced experimental script `train_embedded_explicit_model.py` for ONNX export and embedded deployment (not yet validated)
+---
+## [v0.1.0] – Initial Release
+### 🚀 Baseline Functionality
+- Preprocessing scripts for grayscale mask generation and dataset formatting
+- Training and evaluation scripts for custom segmentation model
+- Initial model card and license
+---
+## [v0.0.1] – Project Start
+- Initial discussion on using ResNet as an encoder in U-Net
+- Creation of an example synthetic dataset
+- Structuring of the basic inference script
+- Validation of the visual pipeline and preprocessing strategy

DataSet/ExtraTests/29bb3ece3180_11.jpg ADDED Viewed

Git LFS Details

SHA256: 23e473f7d4aac81292ae9720a42ba75323201a5557b668b22ad824b2bcee0a3a
Pointer size: 131 Bytes
Size of remote file: 110 kB

DataSet/annotations/classes.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ 0 - Fundo
2	+ 1 - Objeto

DataSet/images/0cdf5b5d0ce1_01.jpg ADDED Viewed

Git LFS Details

SHA256: f48d6a7cbd4f1d7c4341143fe64f7ad4d328d6b426b1be3efb23180e8484cd45
Pointer size: 130 Bytes
Size of remote file: 19.1 kB

DataSet/masks/0cdf5b5d0ce1_01_mask.png ADDED Viewed

Git LFS Details

SHA256: 4e2d7ae86d5cd88d804d14cf6a8004876d0390aa09cb482edb81d3aae6d45705
Pointer size: 129 Bytes
Size of remote file: 4.27 kB

LICENSE ADDED Viewed

	@@ -0,0 +1,20 @@

+MIT License
+Copyright (c) 2025 André Costa
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the “Software”), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

README.md CHANGED Viewed

@@ -1,3 +1,345 @@
----
-license: mit
----

+# Image Segmentation with ResNet + U-Net
+💡 ResNet + U-NET fusion combines deep and contextual vision (ResNet) with spatial fidelity and accuracy in the details (U-NET).
+It is a versatile, powerful and high sensitivity architecture - ideal for projects where each pixel matters.
+The model shines in scenarios where the object is small, detailed or textured, and the global context (whole scene) does not help much.
+This makes it ideal for:
+	- Medical segmentation (eg tumors, vessels)
+	- Industrial defect inspection
+	- Embedded vision for robotics or quality control
+⚠️ However, this current version was trained on a **narrow-domain dataset**, collected under controlled indoor conditions — consistent lighting, high-contrast backgrounds, and fixed camera angles. As a result, its ability to generalize to open-world scenarios (e.g., outdoor images, different backgrounds) is limited.
+**This is not a flaw of the model**, but a **natural reflection of its training data**. When retrained with more diverse and realistic datasets, this architecture has strong potential for robust performance in general-purpose segmentation tasks.
+## 📌 Class Convention
+This project follows the standard:
+- Class 0: Background
+- Class 1: Segmented Object
+All masks were converted to reflect this convention before training.
+## 🌐 Limitations and Considerations
+This model was trained with images captured in a highly controlled environment: constant lighting, a clean background, and objects (cars) positioned on a rotating platform.
+As a result, it achieves very high accuracy (IoU > 99%) when evaluated on images similar to those in the original dataset. However, its performance deteriorates significantly when exposed to images collected outdoors, with variations in light, angle, background, and perspective.
+This limitation was expected and will be taken into account for future versions with more diverse datasets.
+Good Image..
+![training accuracy](./good_image.png) "good_image.png: Segmentation under ideal studio lighting"
+Bad Image..
+![training accuracy](./bad_image.png) "Failure example with open-world street background"
+## 🌟 Objective
+To segment objects in custom grayscale images based on manual annotations, using a complete training pipeline, automated inference, and visual mask validation.
+## 🤖 Notes on Development
+This project was born after many hours of experimentation, learning and progress driven by caffeine.
+Unlike other projects I have participated in before, this one evolved incredibly quickly thanks to the support of artificial intelligence such as Copilot (Microsoft) and ChatGPT (OpenAI). Without a doubt, these are tools that are way ahead of their time.
+As part of the experience of using and learning from these advanced AI tools, I always threw problems at both of them, to measure their performance and compare their responses. And to make the experience more fun, I kept an extremely formal dialogue with one and not at all formal with the other to see how they would react. And after a while, I reversed it, now being informal with the one that was previously formal and vice versa.
+Big thanks to both copilots — one named Microsoft, the other simply GPT.
+- Powered by: PyTorch, Gradio, OpenCV, Matplotlib, and Hugging Face Datasets
+## 📁 Project Structure
+.
+├── run_app.py
+├── bad_image.png
+├── CHANGELOG.md
+├── checkpoints
+│   ├── best_model.pt
+│   └── modelo_completo.pth
+├── DataSet
+│   ├── annotations
+│   │   └── classes.txt
+│   ├── ExtraTests
+│   ├── images
+│   └── masks
+├── dice_history.png
+├── run_evaluate.py
+├── good_image.png
+├── __init__.py
+├── iou_history.png
+├── LICENSE
+├── model_card.md
+├── .huggingface
+│   └── model-index.yaml
+├── README.md
+├── report_file.txt
+├── requirements.txt
+├── scripts
+│   ├── config.py
+│   ├── Dataset
+│   │   ├── ConvertFormat.py
+│   │   ├── dataAugmentation.py
+│   │   ├── deleteDuplicates.py
+│   │   ├── getDS_HuggingFace.py
+│   │   ├── getImages.py
+│   │   ├── grays.py
+│   │   ├── __init__.py
+│   │   ├── mask_diagnosis.py
+│   │   ├── masks.py
+│   │   ├── Rename.py
+│   │   ├── Resize.py
+│   │   ├── TrainVal.py
+│   │   └── validMasks.py
+│   ├── __init__.py
+│   └── Segmentation
+│       ├── app.py
+│       ├── augment.py
+│       ├── diceLossCriterion.py
+│       ├── evaluate_model.py
+│       ├── flagged
+│       ├── focalLoss.py
+│       ├── Future
+│       ├── __init__.py
+│       ├── models.py
+│       ├── segDS.py
+│       └── train.py
+├── structure.txt
+├── training_loss.png
+└── training_val_accuracy.png
+### 📁 Root Directory
+| Name                     | Description                                                                 |
+|--------------------------|-----------------------------------------------------------------------------|
+| `run_app.py`             | Launcher script — possibly for local inference or interface                 |
+| `bad_image.png`          | Example of a failed prediction (for benchmarking or documentation)          |
+| `good_image.png`         | Example of a successful prediction (used for showcasing model quality)      |
+| `CHANGELOG.md`           | History of changes and version updates                                      |
+| `checkpoints/`           | Contains trained model files (`best_model.pt`, `modelo_completo.pth`)       |
+| `DataSet/`               | Contains training images, masks, annotations, and extra test sets           |
+| `dice_history.png`       | Visualization of Dice score progression during training                     |
+| `iou_history.png`        | Graph of Intersection over Union (IoU) evolution across epochs              |
+| `training_loss.png`      | Plot showing model loss evolution throughout training                       |
+| `training_val_accuracy.png` | Graph of validation accuracy during model training                       |
+| `run_evaluate.py`        | Evaluation script runnable from root — assesses model performance           |
+| `__init__.py`            | Declares root as a Python package (if imported externally)                  |
+| `LICENSE`                | Legal terms for usage and redistribution                                    |
+| `model_card.md`          | Technical summary of model details, performance, and intended use           |
+| `.huggingface/model-index.yaml` | Configuration file for Hugging Face model registry (optional export) |
+| `README.md`              | Main documentation file — project overview, usage, and setup guide          |
+| `report_file.txt`        | Training log and report output saved during execution                       |
+| `requirements.txt`       | List of dependencies needed for running the project                         |
+| `scripts/`               | Main logic for training, evaluation, dataset preparation, and modeling      |
+| `structure.txt`          | Manual export of the folder structure, used as reference or debug aid       |
+### 📁 DataSet/
+| Name              | Description                                                                     |
+|-------------------|---------------------------------------------------------------------------------|
+| `annotations/`    | Contains `classes.txt`, defining class labels used in segmentation              |
+| `images/`         | Input images used for training and evaluation                                   |
+| `masks/`          | Segmentation masks aligned with input images                                    |
+| `ExtraTests/`     | Optional dataset with additional test cases for generalization assessment       |
+### 📁 scripts/
+| Name                 | Description                                                                   |
+|----------------------|-------------------------------------------------------------------------------|
+| `config.py`          | Configuration module holding paths, flags, and hyperparameters                |
+| `__init__.py`        | Declares `scripts/` as an importable Python module                            |
+### 📁 scripts/Dataset/
+| Name                   | Description                                                                 |
+|------------------------|-----------------------------------------------------------------------------|
+| `ConvertFormat.py`     | Converts image or annotation formats (e.g. from JPG to PNG, or COCO to mask)|
+| `dataAugmentation.py`  | Applies offline augmentations to images or masks                            |
+| `deleteDuplicates.py`  | Detects and removes duplicate samples                                       |
+| `getDS_HuggingFace.py` | Downloads datasets from Hugging Face 🤗                                     |
+| `getImages.py`         | Image retrieval or organization from storage                                |
+| `grays.py`             | Converts images to grayscale                                                |
+| `mask_diagnosis.py`    | Validates and diagnoses potential issues in masks                           |
+| `masks.py`             | Performs manipulation or binarization of segmentation masks                 |
+| `Rename.py`            | Batch renaming utility to standardize filenames                             |
+| `Resize.py`            | Resizes images and masks to uniform dimensions                              |
+| `TrainVal.py`          | Performs dataset train/validation splitting                                 |
+| `validMasks.py`        | Checks for validity in mask formatting and values                           |
+| `__init__.py`          | Declares `Dataset/` as a Python package                                     |
+### 📁 scripts/Segmentation/
+| Name                   | Description                                                                 |
+|------------------------|-----------------------------------------------------------------------------|
+| `app.py`               | Local interface for model inference — CLI or GUI                            |
+| `augment.py`           | Online augmentations and Test-Time Augmentation (TTA)                       |
+| `diceLossCriterion.py` | Custom Dice Loss implementation for segmentation                            |
+| `focalLoss.py`         | Custom Focal Loss implementation to handle class imbalance                  |
+| `evaluate_model.py`    | Model evaluator with metrics like IoU, Dice, and pixel accuracy             |
+| `models.py`            | Contains neural network architecture (e.g. UNet based on ResNet)            |
+| `segDS.py`             | Dataset class for segmentation tasks, loading images and masks              |
+| `train.py`             | Main training script with logging, plotting, checkpointing, and early stop  |
+| `Future/`              | Experimental code including auto hyperparameter tuning                      |
+| `flagged/`             | Optional output folder for flagged evaluations or debug samples             |
+| `__init__.py`          | Declares `Segmentation/` as a Python package                                |
+## Dataset
+This project uses data from the [CV Image Segmentation Dataset](https://www.kaggle.com/datasets/antoreepjana/cv-image-segmentation), which provides paired images and masks for semantic segmentation tasks.
+The Dataset presents some distinct data subsets.
+I only used the images related to carvana cars (Kaggle Carvana Car Mask Segmentation). This was the dataset used to test the project ...
+The data subset used as the dataset for the project was pre-processed with the following order of scripts present in this project:
+ 1 - Run getImages.py #Or use other data sources.
+ 2 - Visually inspect the collected images.
+ 3 - Run deleteDuplicates.py
+ 4 - Run ConvertFormat.py
+ 5 - Run Resize.py (Must be run for both the image and mask directories).
+ 6 - Run grays.py (Must be run for both the image and mask directories).
+ 8 - Make annotations.
+ 9 - Run masks.py
+10 - Run validMasks.py
+11 - Run TrainVal.py
+---
+## ⚙️ Model
+* Architecture: ResNet encoder + U-Net decoder
+* Input: 1-channel grayscale, resized to 512×512
+* Loss: Cross Entropy Loss with class weighting
+* Optimizer: Adam
+* Scheduler: StepLR with decay
+* Training duration: configurable (default: 400 epochs)
+* Early Stopping: based on accuracy stagnation
+* Checkpoints: saved every N epochs + best model saved
+Training script: `scripts/Segmentation/train.py`
+Evaluation scripts:
+* `scripts/Segmentation/evaluate_model.py`: Batch evaluation over image folders
+* `scripts/Segmentation/app.py`: Gradio demo for interactive inference
+* `run_app.py`: Wrapper script to launch the Gradio interface from the root directory (calls scripts/Segmentation/app.py internally)
+* `run_evaluate.py`: wrapper script to launch the general pre-testing script from the root directory (calls scripts/Segmentation/evaluate_model.py internally)
+📄 The model is documented and registered via model-index.yaml for proper listing on Hugging Face Hub.
+---
+## 📈 Evaluation
+Quantitative metrics include:
+* Intersection over Union (IoU)
+* Dice coefficient
+* Accuracy, Precision, Recall
+* Balanced Accuracy and MCC
+Visual inspection is supported via overlay masks in the ExtraTests/ folder.
+![training accuracy](./training_accuracy.png)
+![training loss](./training_loss.png)
+![iou_history](./iou_history.png)
+![dice_history](./dice_history.png)
+---
+## 🔬 Future Work
+The directory `scripts/Segmentation/Future/` includes planned extensions for embedded deployment:
+* `train_embedded_explicit_model.py`: A simplified and modular training script for generating lightweight ONNX models.
+  Note: This script was not executed or validated during this certification phase.
+---
+## 🏗 Deployment Options
+This project includes two scripts for model evaluation:
+### 🧪 Batch Evaluation Script (`evaluate_model.py`)
+Use this script to run the model on an entire directory of test images. Ideal for debugging, validation, and quantitative analysis.
+```bash
+python evaluate_model.py --input ./your-test-images/
+```
+You can modify this script to save prediction masks, compute metrics (IoU, pixel accuracy), or visualize results in batch.
+---
+### 🌐 Interactive Web Demo (`app.py`)
+This script provides an interactive interface using [Gradio](https://www.gradio.app/). It's designed for easy deployment and model demonstration, such as on Hugging Face Spaces.
+To launch the web app locally:
+```bash
+python app.py
+```
+Or try it online (if hosted):
+👉 [Live demo on Hugging Face Spaces](https://huggingface.co/spaces/seu-usuario/seu-modelo) *TODO:(link será atualizado após submissão)*
+This interface allows anyone to upload an image and instantly see the segmentation results — no installation required.
+---
+📌 **Tip**: Use `evaluate_model.py` during development and testing, and `app.py` for sharing and showcasing your model.
+---
+## 🏆 Certification Context
+This repository was submitted for the Hugging Face Computer Vision Certification and is built upon reproducibility, modularity, dataset transparency, and technical rigor.
+---
+## 📄 License
+This project is licensed under the MIT License.
+Dataset usage must comply with the original Kaggle dataset license terms.
+---
+## 🔮 Future improvements
+Some steps are already planned for the project's evolution:
+* Architecture refinement: test lighter variants (e.g. ResNet18, MobileNetV3) to compare performance in embedded environments.
+* Training with data augmentation: use Data Augmentation strategies (rotation, noise, scale, brightness) to increase model robustness.
+* Cross-validation: include a cross-validation strategy to increase confidence in metrics.
+* Conversion to ONNX/TensorRT: prepare an exportable version of the model for inference on edge devices.
+* Deployment on specific hardware: test inference on ESP32-S3 or Raspberry Pi using a simplified pipeline with float16.
+* Visualization interface: create a simple script or panel that allows you to upload an image and view the segmentation live.
+These improvements will be implemented as the project progresses, keeping the focus on lightness, modularity, and real applicability in computer vision with monochromatic images.
+---
+## 🌟 Final thoughts: why this certification matters
+This project represents more than just completing a technical challenge. For me, it is the fulfillment of a long-held dream — to earn a professional certification that values knowledge, practice, and the ability to solve real-world problems, rather than just familiarity with specific versions of tools or frameworks.
+For many years, I experienced the frustrating side of commercial certifications that felt more like traps than opportunities: exams based on obsolete technologies, questionable application centers, and mechanisms that created more obstacles than recognition. That never represented who I am — or what I am capable of building.
+This certification, promoted by Hugging Face, is different. It validates true competencies in machine learning and computer vision based on a real-world project, executed end-to-end. It is a type of recognition that carries technical, ethical, and personal value.
+That is why it is not “just another delivery.” It is a turning point.
+---
+## 🌟 Important notes…
+1) The IDE used in the project was Eclipse (https://eclipseide.org/) using the PyDev module (https://www.pydev.org/). In this environment it was necessary to include the project path in PyDev-PYTHONPATH to perfectly recognize the includes of some files, as was the case with config.py.
+2) The model is being trained with the "train.py" script.
+   However, there is a second training script called "cyber_train.py."
+   This is an empirical test I'm conducting. A little research of my own.
+   In "train," the hyperparameters are chosen manually.
+   In "cyber_train," the script will run 25 short training sessions, each lasting 5 epochs, to test the hyperparameters within the established limits and determine the best ones. Then, the actual training will be performed using the best hyperparameters detected.
+   And where does my empirical research come in?
+   I'm training first with the simplest version of the script, measuring how long it takes me to arrive at a model with a good accuracy percentage.
+   Once this is done, I'll run the automated version...
+   Then, I'll compare which of the two models performed better and how long it took me to achieve each one...
+   This will serve as a reference for more accurate trade-offs in future projects.

__init__.py ADDED Viewed

File without changes

bad_image.png ADDED Viewed

Git LFS Details

SHA256: 59fdd2c69c6fd9c5c63e64c567e97082de4ddee9975a94e3974a3d885f02a413
Pointer size: 131 Bytes
Size of remote file: 232 kB

checkpoints/best_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:196deed7297ecec59a59b61d6f93fb85655bc9130b4a3360c2a45117f12296ed
+size 188378594

dice_history.png ADDED Viewed

Git LFS Details

SHA256: 4eeb398651703b9c3421ca7450077954699257d8c0b87d1b04ce02aedf8f76a6
Pointer size: 130 Bytes
Size of remote file: 39.3 kB

good_image.png ADDED Viewed

Git LFS Details

SHA256: 0c59a27058fea8007b1ebccfe2e68099c86c81d4075f279ad8df7dbcc2f11f73
Pointer size: 131 Bytes
Size of remote file: 147 kB

iou_history.png ADDED Viewed

Git LFS Details

SHA256: 84fcfa154f6091b8f9fcaaac86c02ca12d2f464a0d170263f506573e2093a154
Pointer size: 130 Bytes
Size of remote file: 40.5 kB

model_card.md ADDED Viewed

	@@ -0,0 +1,122 @@

+# 📑 Model Card – Segmentation
+## 🧾 Overview
+💡 **ResNet + U-Net fusion** combines deep and contextual vision (ResNet) with spatial fidelity and precision in details (U-Net).
+It is a versatile, powerful, and high-sensitivity architecture — ideal for projects where **every pixel matters**.
+The model excels in scenarios where the object is **small, detailed, or textured**, and where the **global scene context offers little help**.
+This makes it ideal for:
+- Medical segmentation (e.g., tumors, vessels)
+- Industrial defect inspection
+- Embedded vision for robotics or precision tasks
+⚠️ However, this specific version was trained on a **narrow-domain dataset**, captured under **controlled indoor conditions**: consistent lighting, high-contrast backgrounds, and fixed camera angles.
+As a result, its ability to generalize to open-world scenarios (e.g., outdoor environments, variable backgrounds) is limited.
+**This is not a flaw in the model**, but a **natural reflection of the training data**.
+When retrained with more diverse and realistic datasets, this architecture is highly capable of delivering robust performance across a wide range of segmentation tasks.
+---
+## ☕ Behind the Scenes
+This certification project was built one commit at a time — powered by curiosity, long debugging sessions, strategic doses of caffeine, and great support from **Microsoft Copilot** and **ChatGPT (OpenAI)**, whose insights were essential in structuring the segmentation pipeline and planning its embedded future.
+> "Every time the model tries to segment, the square figure resurfaces. Not as an error, but as a reminder: deep learning can be quite shallow when the curse of imperfect geometry sets in.
+> And even when all the code is rewritten, the world is realigned, and optimism rises again… there she is: the misshapen quadratic figure.
+> Unfazed, unshakeable, perhaps even moved by her own stubbornness. She's not a bug — she's a character."
+---
+## 🗂️ Dataset
+This model was trained using a subset of the [CV Image Segmentation Dataset](https://www.kaggle.com/datasets/antoreepjana/cv-image-segmentation), available on Kaggle.
+- **Author**: Antoreep Jana
+- **License**: For educational and non-commercial use
+- **Content**: 300+ annotated images for binary segmentation
+- **Preprocessing**: All images resized to 512×512 and converted to grayscale
+⚠️ *Only a filtered and preprocessed subset (related to car images) was used for this version.*
+The Dataset presents some distinct data subsets.
+I only used the images related to carvana cars (Kaggle Carvana Car Mask Segmentation). This was the dataset used to test the project ...
+---
+## ⚙️ Model Architecture
+- **Encoder**: ResNet-50 (pretrained, adapted for 1-channel input)
+- **Decoder**: U-Net with skip connections and bilinear upsampling
+- **Input**: Grayscale, 512×512
+- **Output**: Binary segmentation mask (background vs. object)
+- **Loss**: Composite of `CrossEntropyLoss + DiceLoss`
+- **Framework**: PyTorch
+---
+## 📊 Evaluation Metrics
+- Pixel Accuracy (train/val)
+- Dice Coefficient
+- CrossEntropy Loss
+- Class-weighted loss balancing
+- *(IoU, MCC, Precision/Recall planned for future integration)*
+🧪 Evaluation performed using `evaluate_model.py`
+---
+## ⚠️ Limitations
+This model achieves excellent results when tested on **studio-like images**: consistent lighting, neutral backgrounds, and static perspectives.
+However, performance decreases on **unseen outdoor scenarios** (e.g., cars on the street, parking lots) — where background noise, lighting variation, and camera angle impact results.
+➡️ This **limitation is dataset-induced**, not architectural.
+When trained on more realistic data, this model generalizes well due to its high sensitivity to texture and spatial structure.
+---
+## 🚀 Intended Use
+Best suited for applications where conditions are similar to the training set, such as:
+- Quality control in automotive photography studios
+- Automated documentation of vehicles in inspection booths
+- Offline image processing for structured, grayscale datasets
+---
+## 💡 Recommendations
+To deploy in open-world environments (e.g., mobile robots, outdoor cameras), it is strongly recommended to **retrain or fine-tune** the model using a **more heterogeneous dataset**.
+---
+## 🔬 Planned Extensions
+The following experimental modules are under active development and may be integrated in future releases:
+1️⃣ **Embedded Deployment Pipeline**
+- Export to ONNX format with float16 precision
+- C++ reimplementation targeting edge devices such as ESP32-S3 and STM32H7
+- Lightweight modular training script:
+  `scripts/Segmentation/Future/train_embedded_explicit_model.py`
+  *Status: Experimental – not validated in this version*
+2️⃣ **Automated Hyperparameter Optimization**
+- Training script that performs automatic hyperparameter search and tuning before final training
+- Designed to improve efficiency and reduce manual configuration
+- Script:
+  `scripts/Segmentation/Future/cyber_train.py`
+  *Status: Experimental – not validated in this version*
+---
+## 🪪 Licensing
+- **Code**: MIT License
+- **Dataset**: Attribution required (as per Kaggle contributor)

report_file.txt ADDED Viewed

	@@ -0,0 +1,56 @@

+2.3.1+cu121
+12.1
+['sm_50', 'sm_60', 'sm_70', 'sm_75', 'sm_80', 'sm_86', 'sm_90']
+Starting training on: 2025-07-22 09:53:58.111068
+Epoch 1/100, Loss: 318.6268, Train Acc: 0.8727, Val Acc: 0.9918, IoU: 0.9758, Dice: 0.9806
+🔸 New best model at epoch 1 (acc: 0.8727) — saving best_model.pt
+Epoch 2/100, Loss: 269.4433, Train Acc: 0.8604, Val Acc: 0.9852, IoU: 0.9565, Dice: 0.9645
+Epoch 3/100, Loss: 38.0319, Train Acc: 0.8820, Val Acc: 0.9817, IoU: 0.9477, Dice: 0.9575
+🔸 New best model at epoch 3 (acc: 0.8820) — saving best_model.pt
+Epoch 4/100, Loss: 31.6814, Train Acc: 0.8829, Val Acc: 0.9211, IoU: 0.8126, Dice: 0.8404
+🔸 New best model at epoch 4 (acc: 0.8829) — saving best_model.pt
+Epoch 5/100, Loss: 26.3503, Train Acc: 0.8828, Val Acc: 0.9449, IoU: 0.8603, Dice: 0.8827
+Epoch 6/100, Loss: 21.8011, Train Acc: 0.8823, Val Acc: 0.9263, IoU: 0.8224, Dice: 0.8491
+Epoch 7/100, Loss: 19.5440, Train Acc: 0.8828, Val Acc: 0.9199, IoU: 0.8102, Dice: 0.8381
+Epoch 8/100, Loss: 28.5497, Train Acc: 0.8818, Val Acc: 0.9641, IoU: 0.9036, Dice: 0.9204
+Epoch 9/100, Loss: 23.4882, Train Acc: 0.8827, Val Acc: 0.9236, IoU: 0.8172, Dice: 0.8444
+Epoch 10/100, Loss: 18.1255, Train Acc: 0.8838, Val Acc: 0.9187, IoU: 0.8079, Dice: 0.8361
+🔸 New best model at epoch 10 (acc: 0.8838) — saving best_model.pt
+Epoch 11/100, Loss: 16.7196, Train Acc: 0.8844, Val Acc: 0.9135, IoU: 0.7985, Dice: 0.8278
+🔸 New best model at epoch 11 (acc: 0.8844) — saving best_model.pt
+Epoch 12/100, Loss: 35.1093, Train Acc: 0.8820, Val Acc: 0.9400, IoU: 0.8503, Dice: 0.8741
+Epoch 13/100, Loss: 17.0448, Train Acc: 0.8827, Val Acc: 0.9333, IoU: 0.8365, Dice: 0.8618
+Epoch 14/100, Loss: 15.8128, Train Acc: 0.8842, Val Acc: 0.9202, IoU: 0.8110, Dice: 0.8391
+Epoch 15/100, Loss: 14.9408, Train Acc: 0.8841, Val Acc: 0.9278, IoU: 0.8256, Dice: 0.8521
+Epoch 16/100, Loss: 20.7117, Train Acc: 0.8830, Val Acc: 0.9222, IoU: 0.8149, Dice: 0.8426
+Epoch 17/100, Loss: 15.2986, Train Acc: 0.8838, Val Acc: 0.9214, IoU: 0.8132, Dice: 0.8410
+Epoch 18/100, Loss: 13.8610, Train Acc: 0.8831, Val Acc: 0.9273, IoU: 0.8245, Dice: 0.8511
+Epoch 19/100, Loss: 13.5508, Train Acc: 0.8831, Val Acc: 0.9360, IoU: 0.8418, Dice: 0.8665
+Epoch 20/100, Loss: 12.6651, Train Acc: 0.8823, Val Acc: 0.9171, IoU: 0.8050, Dice: 0.8336
+Epoch 21/100, Loss: 12.4524, Train Acc: 0.8844, Val Acc: 0.9254, IoU: 0.8208, Dice: 0.8478
+🔸 New best model at epoch 21 (acc: 0.8844) — saving best_model.pt
+Epoch 22/100, Loss: 11.9378, Train Acc: 0.8848, Val Acc: 0.9184, IoU: 0.8073, Dice: 0.8356
+🔸 New best model at epoch 22 (acc: 0.8848) — saving best_model.pt
+Epoch 23/100, Loss: 11.6401, Train Acc: 0.8844, Val Acc: 0.9239, IoU: 0.8178, Dice: 0.8449
+Epoch 24/100, Loss: 11.1857, Train Acc: 0.8835, Val Acc: 0.9269, IoU: 0.8237, Dice: 0.8505
+Epoch 25/100, Loss: 25.1642, Train Acc: 0.8828, Val Acc: 0.9221, IoU: 0.8147, Dice: 0.8424
+Epoch 26/100, Loss: 13.8112, Train Acc: 0.8839, Val Acc: 0.9135, IoU: 0.7987, Dice: 0.8280
+Epoch 27/100, Loss: 11.8389, Train Acc: 0.8833, Val Acc: 0.9147, IoU: 0.8008, Dice: 0.8299
+Epoch 28/100, Loss: 11.0536, Train Acc: 0.8832, Val Acc: 0.9105, IoU: 0.7930, Dice: 0.8229
+Epoch 29/100, Loss: 10.6666, Train Acc: 0.8847, Val Acc: 0.9156, IoU: 0.8024, Dice: 0.8313
+Epoch 30/100, Loss: 10.2207, Train Acc: 0.8834, Val Acc: 0.9165, IoU: 0.8042, Dice: 0.8331
+Epoch 31/100, Loss: 12.5192, Train Acc: 0.8848, Val Acc: 0.9535, IoU: 0.8794, Dice: 0.8996
+Epoch 32/100, Loss: 10.1232, Train Acc: 0.8844, Val Acc: 0.9356, IoU: 0.8412, Dice: 0.8660
+⛔ Early stopping triggered at epoch 32
+Completing training on: 2025-07-22 19:30:39.892257
+Total training execution time = 9:36:41.781189
+Training Summary:
+  Min Loss: 10.1232
+  Max Loss: 318.6268
+  Loss final: 10.1232
+  Best Val Acc: 0.9918

requirements.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+#albumentations==1.4.14
+albumentations==2.0.8
+#datasets==2.18.0
+datasets==3.6.0
+gradio==5.38.2
+ImageHash==4.3.2
+matplotlib==3.10.3
+numpy==1.21.5
+#opencv_contrib_python==4.6.0.66
+opencv_contrib_python==4.9.0.80
+opencv_contrib_python_headless==4.9.0.80
+#opencv_python==4.6.0.66
+opencv_python==4.9.0.80
+opencv_python_headless==4.10.0.84
+pandas==2.0.3
+#Pillow==9.0.1
+Pillow==11.3.0
+scikit_learn==1.4.0
+simple_image_download==0.2
+torch==2.3.1
+#torch==2.1.2
+#torch==1.11.0
+torchvision==0.18.1
+#torchvision==0.16.2
+#torchvision==0.12.0
+warmup_scheduler==0.3

run_app.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.dirname(__file__)))
+from scripts.Segmentation import app
+if __name__ == '__main__':
+    app.demo.launch(share=True)

run_evaluate.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.dirname(__file__)))
+from scripts.Segmentation import evaluate_model
+if __name__ == '__main__':
+    evaluate_model.run()

scripts/Dataset/ConvertFormat.py ADDED Viewed

	@@ -0,0 +1,32 @@

+#encoding=utf8
+import os
+from PIL import Image
+import scripts.config as config
+target = config.masks
+extension_destination = '.png'
+print('Starting processing...')
+files = os.listdir(target)
+for fileOne in files:
+    nameFile, extension = os.path.splitext(fileOne)
+    print('Testing: ', fileOne)
+    if((extension != extension_destination) and
+       (extension != '.json')):
+        if(extension == ''):
+            extension = '....'
+        print('\nRenaming: ' + str(fileOne))
+        origem = os.path.join(target, fileOne)
+        destino = fileOne.replace(extension, extension_destination)
+        if(extension == '....'):
+            destino = destino + extension_destination
+        destino = os.path.join(target, destino)
+        im = Image.open(origem).convert('RGB')
+        im.save(destino, 'PNG')
+        os.remove(origem)
+        print('-----------------------------------------')
+print('\n\nCompleted...')

scripts/Dataset/Rename.py ADDED Viewed

	@@ -0,0 +1,22 @@

+#encoding=utf8
+import os
+import scripts.config as config
+extension = '.jpg'
+print('Starting processing...')
+contador = 0
+paths = [os.path.join(config.images, nome) for nome in os.listdir(config.images)]
+files = [arq for arq in paths if os.path.isfile(arq)]
+jpgs = [arq for arq in files if arq.lower().endswith(extension)]
+for img in jpgs:
+    imagePath = str(img)
+    oldName = imagePath.replace(config.images, '')
+    print('\n   ' + oldName)
+    newName = config.images + str(contador) + extension
+    os.rename(imagePath, newName)
+    contador = contador + 1
+print('\n\nCompleted...')

scripts/Dataset/Resize.py ADDED Viewed

	@@ -0,0 +1,25 @@

+#encoding=utf8
+import os
+from PIL import Image
+import scripts.config as config
+def resize_all(target, extensao):
+    paths = [os.path.join(target, nome) for nome in os.listdir(target)]
+    files = [arq for arq in paths if os.path.isfile(arq)]
+    files = [arq for arq in files if arq.lower().endswith(extensao)]
+    for img in files:
+        imagePath = str(img)
+        print('   ' + imagePath)
+        try:
+            image = Image.open(imagePath)
+            resized = image.resize((config.width, config.height), Image.LANCZOS)
+            resized.save(imagePath)
+        except Exception as e:
+            print(f" {imagePath}: {e}")
+print('Starting processing...')
+resize_all(config.images, '.jpg')
+resize_all(config.masks, '.png')
+print('\n\nCompleted...')

scripts/Dataset/TrainVal.py ADDED Viewed

	@@ -0,0 +1,35 @@

+#encoding=utf8
+import os
+import pandas as pd
+import scripts.config as config
+from sklearn.model_selection import train_test_split
+nameTrain = config.source + 'train.txt'
+nameVal = config.source + 'val.txt'
+percentual = 0.7
+def nomeOrigem(originalName):
+    nome = os.path.join(config.images, originalName)
+    return nome
+def nomeMascara(originalName):
+    nome = os.path.join(config.masks, originalName)
+    nome = nome.replace('.jpg', '.png')
+    return nome
+print('Starting processing...')
+paths = [os.path.join(config.images, nome) for nome in os.listdir(config.images)]
+files = [arq for arq in paths if os.path.isfile(arq)]
+files = [arq for arq in files if arq.lower().endswith('.jpg')]
+df = pd.DataFrame({'nome_original': files})
+df['newNameA'] = [nomeOrigem(nome) for i, nome in enumerate(files)]
+df['newNameB'] = [nomeMascara(nome) for i, nome in enumerate(files)]
+grupo1, grupo2 = train_test_split(df, train_size=percentual, random_state=42)
+grupo1[['newNameA', 'newNameB']].to_csv(nameTrain, sep='\t', index=False, header=False)
+grupo2[['newNameA', 'newNameB']].to_csv(nameVal, sep='\t', index=False, header=False)
+print("Groups saved in '" + nameTrain + "' and '" + nameVal + "'")

scripts/Dataset/__init__.py ADDED Viewed

File without changes

scripts/Dataset/dataAugmentation.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import cv2
+import numpy as np
+from PIL import Image
+import albumentations as A
+import scripts.config as config
+input_img_dir = config.images
+out_img_dir = config.images #+ "/images_aug"
+input_mask_dir = config.masks
+out_mask_dir = config.masks #+ "/masks_aug"
+print('Starting...')
+os.makedirs(out_img_dir, exist_ok=True)
+os.makedirs(out_mask_dir, exist_ok=True)
+transformations = [
+    ("flip", A.HorizontalFlip(p=1.0)),
+    ("rot15", A.Rotate(limit=15, p=1.0)),
+    ("contrast", A.RandomBrightnessContrast(p=1.0)),
+]
+for fname in os.listdir(input_img_dir):
+    if not fname.endswith(".jpg"): continue
+    base = fname.replace(".jpg", "")
+    img_path = os.path.join(input_img_dir, fname)
+    mask_path = os.path.join(input_mask_dir, base + "_mask.png")
+    img = np.array(Image.open(img_path).convert("L"))
+    mask = np.array(Image.open(mask_path).convert("L"))
+    mask = (mask > 127).astype('uint8')
+    for name, tf in transformations:
+        aug = A.Compose([tf])
+        augmented = aug(image=img, mask=mask)
+        img_aug = augmented['image']
+        mask_aug = augmented['mask']
+        img_out = os.path.join(out_img_dir, f"{base}_aug_{name}.jpg")
+        mask_out = os.path.join(out_mask_dir, f"{base}_aug_{name}_mask.png")
+        cv2.imwrite(img_out, img_aug)
+        cv2.imwrite(mask_out, mask_aug * 255)
+print('Completed...')

scripts/Dataset/deleteDuplicates.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import os
+import imagehash
+from PIL import Image
+import scripts.config as config
+hashes = {}
+duplicates = []
+paths = [os.path.join(config.images, nome) for nome in os.listdir(config.images)]
+files = [arq for arq in paths if os.path.isfile(arq)]
+fileNames = [arq for arq in files if arq.lower().endswith('.jpg')]
+for filename in fileNames:
+    filepath = filename
+    try:
+        with Image.open(filepath) as img:
+            img_hash = imagehash.phash(img)  # Ou: dhash(img), average_hash(img)
+            if img_hash in hashes:
+                print(f"Duplicate detected: {filename} ≈ {hashes[img_hash]}")
+                duplicates.append(filepath)
+            else:
+                hashes[img_hash] = filename
+    except Exception as e:
+        print(f"Error processing {filename}: {e}")
+for dup in duplicates:
+    try:
+        os.remove(dup)
+        print(f"Removed: {dup}")
+    except Exception as e:
+        print(f"Error removing {dup}: {e}")
+print(f"\nTotal duplicates removed: {len(duplicates)}")

scripts/Dataset/getDS_HuggingFace.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+from datasets import load_dataset
+dataset_name = 'Onegafer/vehicle_segmentation'
+split = "train"  # ou "test", "validation", etc.
+print(f"🔽 Baixando o dataset: {dataset_name}...")
+dataset = load_dataset(dataset_name, split=split)
+print(f"✅ Dataset carregado com {len(dataset)} amostras.")
+print("Exemplo:", dataset[0])
+output_dir = '/home/pi/Deposito/Projetos/Meus/CertificacaoHuggingFace/fontes/TempDataSet/'
+os.makedirs(output_dir, exist_ok=True)
+dataset.save_to_disk(os.path.join(output_dir, dataset_name.replace("/", "_")))
+print(f"💾 Dataset salvo em: {output_dir}/{dataset_name.replace('/', '_')}")

scripts/Dataset/getImages.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+import scripts.config as config
+from simple_image_download import simple_image_download as simp
+termo_busca = "garrote"
+pasta_destino = config.tempImages
+quantidade = 200
+if not os.path.exists(pasta_destino):
+    os.makedirs(pasta_destino)
+response = simp.simple_image_download
+response().download(termo_busca, quantidade)
+print(f"Download complete! Images saved to: {pasta_destino}")

scripts/Dataset/grays.py ADDED Viewed

	@@ -0,0 +1,23 @@

+#encoding=utf8
+import os
+from PIL import Image
+import scripts.config as config
+def convert_all(target, extensao):
+    paths = [os.path.join(target, nome) for nome in os.listdir(target)]
+    files = [arq for arq in paths if os.path.isfile(arq)]
+    arquivos = [arq for arq in files if arq.lower().endswith(extensao)]
+    for img in arquivos:
+        pathImage = str(img)
+        print('   ' + pathImage)
+        image = Image.open(pathImage)
+        image = image.convert("L")
+        image.save(pathImage)
+print('Starting processing...')
+convert_all(config.images, '.jpg')
+convert_all(config.masks, '.png')
+print('\n\nCompleted...')

scripts/Dataset/mask_diagnosis.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import numpy as np
+from PIL import Image
+path = '/home/pi/Deposito/Projetos/Meus/CertificacaoHuggingFace/fontes/'
+path = path + 'DataSet/masks/0cdf5b5d0ce1_01_mask.png'
+mask = Image.open(path).convert("L")
+print(np.unique(np.array(mask)))

scripts/Dataset/masks.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+import cv2
+import json
+import numpy as np
+from PIL import Image
+import scripts.config as config
+print('Starting processing...')
+os.makedirs(config.masks, exist_ok=True)
+for nameFile in os.listdir(config.images):
+    if nameFile.endswith('.json'):
+        jsonPath = os.path.join(config.images, nameFile)
+        dados = ''
+        with open(jsonPath, 'r', encoding='utf-8') as f:
+            dados = json.load(f)
+        baseName = os.path.splitext(nameFile)[0]
+        imagePath = os.path.join(config.images, baseName + '.jpg')
+        image = cv2.imread(imagePath)
+        if image is None:
+            print(f"Image {imagePath} not found or invalid.")
+            continue
+        height, width = image.shape[:2]
+        imgShape = (height, width)
+        contador = 0
+        for shape in dados.get('shapes', []):
+            if shape.get('label') == 'gado' and shape.get('shape_type') == 'polygon':
+                mask = np.zeros(imgShape, dtype=np.uint8)
+                pts = np.array(shape['points'], dtype=np.int32)
+                cv2.fillPoly(mask, [pts], color=1)
+                imgMask = (mask * 255).astype(np.uint8)
+                imgMask = Image.fromarray(imgMask)
+                maskName = f"{baseName}_{contador}.png" if contador > 1 else f"{baseName}.png"
+                endPath = os.path.join(config.masks, maskName)
+                imgMask.save(endPath)
+                print(f"Saved: {endPath}")
+                contador += 1
+print('\n\nCompleted...')

scripts/Dataset/validMasks.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import cv2
+import numpy as np
+from PIL import Image
+import scripts.config as config
+import matplotlib.pyplot as plt
+print('Starting processing...')
+paths = [os.path.join(config.masks, nome) for nome in os.listdir(config.masks)]
+files = [arq for arq in paths if os.path.isfile(arq)]
+masks = [arq for arq in files if arq.lower().endswith('.png')]
+for maskName in masks:
+    imgName = str(maskName)
+    imgName = imgName.replace(config.masks, '')
+    imgName = imgName.replace('.png', '.jpg')
+    if('_' in imgName):
+        vetImgName = imgName.split('_')
+        imgName = vetImgName[0] + '.jpg'
+    print('imgName = ', str(imgName).replace(config.images, ''))
+    print('maskName = ', str(maskName).replace(config.masks, ''))
+    imgPath = os.path.join(config.images, imgName)
+    img = Image.open(imgPath).convert("L").resize((config.height, config.width))
+    img_np = np.array(img)
+    maskPath = os.path.join(config.masks, maskName)
+    mask = Image.open(maskPath).resize((config.height, config.width)).convert("L")
+    mask_np = np.array(mask) // 255
+    overlay = cv2.cvtColor(img_np, cv2.COLOR_GRAY2BGR)
+    overlay[mask_np == 1] = [255, 0, 0]
+    output = cv2.addWeighted(cv2.cvtColor(img_np, cv2.COLOR_GRAY2BGR), 0.7, overlay, 0.3, 0)
+    plt.figure(figsize=(6, 6))
+    plt.imshow(output)
+    plt.title(f"Overlay")
+    plt.axis("off")
+    plt.show()
+    input("Press ENTER to view the next...")
+print('\n\nCompleted...')

scripts/Segmentation/Future/__init__.py ADDED Viewed

File without changes

scripts/Segmentation/Future/cyber_train.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import os
+import torch
+import datetime
+import numpy as np
+import torch.nn as nn
+import torch.optim as optim
+import matplotlib.pyplot as plt
+import scripts.config as config
+from torch.utils.data import DataLoader
+from sklearn.metrics import jaccard_score
+from torch.utils.data import random_split
+import torchvision.transforms as transforms
+import scripts.Segmentation.augment as augment
+from scripts.Segmentation.models import ResNetUNet
+from warmup_scheduler import GradualWarmupScheduler
+from scripts.Segmentation.focalLoss import FocalLoss
+from scripts.Segmentation.segDS import SegmentationDataset
+from scripts.Segmentation.diceLossCriterion import DiceLoss
+def save_report(row):
+    print(str(row))
+    with open(config.report_file, 'a', encoding='utf-8') as f:
+        f.write(str(row) + '\n')
+def compute_class_weights(dataset):
+    class_counts = torch.zeros(2)
+    for _, mask in dataset:
+        pixels = mask.view(-1)
+        for c in [0, 1]:
+            class_counts[c] += (pixels == c).sum()
+    weights = class_counts.sum() / (2.0 * class_counts + 1e-6)
+    weights = weights / weights.sum()
+    return weights
+def auto_detect_hyperparams(model, train_loader, val_loader):
+    candidate_configs = [
+        {"lr": 1e-3, "wd": 1e-5, "use_focal": False, "dice_weight": 1.0},
+        {"lr": 1e-4, "wd": 1e-4, "use_focal": True, "dice_weight": 1.5},
+        {"lr": 5e-4, "wd": 1e-5, "use_focal": False, "dice_weight": 2.0},
+        {"lr": 1e-4, "wd": 1e-6, "use_focal": True, "dice_weight": 1.0},
+    ]
+    best_score = -float('inf')
+    best_config = candidate_configs[0]
+    for cfg in candidate_configs:
+        temp_model = ResNetUNet(num_classes=2).to(config.device)
+        optimizer = optim.AdamW(temp_model.parameters(), lr=cfg["lr"], weight_decay=cfg["wd"])
+        criterion = FocalLoss(gamma=2.0) if cfg["use_focal"] else nn.CrossEntropyLoss()
+        dice_loss = DiceLoss()
+        temp_model.train()
+        for i, (images, masks) in enumerate(train_loader):
+            if i > 2: break
+            images = images.to(config.device).float()
+            masks = masks.to(config.device).long()
+            outputs = temp_model(images)
+            loss = cfg["dice_weight"] * dice_loss(outputs, masks) + 0.5 * criterion(outputs, masks)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+        temp_model.eval()
+        val_acc = 0
+        val_loss = 0
+        val_batches = 0
+        #with torch.no_grad():
+        for images, masks in val_loader:
+            images = images.to(config.device).float()
+            masks = masks.to(config.device).long()
+            outputs = temp_model(images)
+            preds = torch.argmax(outputs, dim=1)
+            acc = (preds == masks).sum().item() / torch.numel(masks)
+            val_acc += acc
+            loss = cfg["dice_weight"] * dice_loss(outputs, masks) + 0.5 * criterion(outputs, masks)
+            val_loss += loss.item()
+            val_batches += 1
+            if val_batches > 2: break
+        avg_score = val_acc / val_batches - val_loss / val_batches
+        if avg_score > best_score:
+            best_score = avg_score
+            best_config = cfg
+    return best_config
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+try:
+    save_report(torch.__version__)
+    save_report(torch.version.cuda)
+    save_report(torch.cuda.get_arch_list()) #['sm_75', 'sm_86']
+except Exception as e:
+    save_report(e)
+    pass
+config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print('Starting processing...')
+os.makedirs(config.checkpoints, exist_ok=True)
+transform = transforms.Compose([
+    transforms.Resize((config.height, config.width)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.5], std=[0.5])
+])
+dataset = SegmentationDataset(transform = transform)
+train_size = int(0.8 * len(dataset))
+val_size = len(dataset) - train_size
+train_ds, val_ds = random_split(dataset, [train_size, val_size])
+train_loader = DataLoader(train_ds, batch_size=4, shuffle=True)
+val_loader = DataLoader(val_ds, batch_size=4, shuffle=False)
+model = ResNetUNet(num_classes=2).to(config.device)
+class_weights = compute_class_weights(dataset).to(config.device)
+selected = auto_detect_hyperparams(model, train_loader, val_loader)
+save_report(f"\n🔍 Auto-selected hyperparams: {selected}")
+criterion = FocalLoss(gamma=2.0) if selected["use_focal"] else nn.CrossEntropyLoss(weight=class_weights)
+optimizer = optim.AdamW(model.parameters(), lr=selected["lr"], weight_decay=selected["wd"])
+dice_loss = DiceLoss()
+cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.num_epochs)
+scheduler = GradualWarmupScheduler(optimizer, multiplier=1.0, total_epoch=5, after_scheduler=cosine_scheduler)
+dataHoraInicial = datetime.datetime.now()
+save_report('\n\n\nStarting training on: ' + str(dataHoraInicial))
+accuracies = []
+iou_history = []
+loss_history = []
+dice_history = []
+val_accuracies = []
+best_accuracy = 0.0
+dice_loss = DiceLoss()
+epochs_no_improve = 0
+model.train()
+for epoch in range(config.num_epochs):
+    total_loss = 0
+    correct_pixels = 0
+    total_pixels = 0
+    for images, masks in train_loader:
+        images = images.to(config.device).float()
+        masks = masks.to(config.device).long()
+        output = []
+        if config.USE_TTA:
+            for img in images:
+                preds = augment.predict_with_tta(model, img.unsqueeze(0))  # [1, C, H, W]
+                output.append(preds)
+            output = torch.cat(output, dim=0)  # [B, C, H, W]
+        else:
+            output = model(images)
+        output = output.float()
+        masks = masks.long()
+        loss = selected["dice_weight"] * dice_loss(output, masks) + 0.5 * criterion(output, masks)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        scheduler.step()
+        preds = torch.argmax(output, dim=1)
+        if config.USE_REFINEMENT:
+            preds = augment.refine_mask(preds)
+        correct_pixels += (preds == masks).sum().item()
+        total_pixels += torch.numel(preds)
+        total_loss += loss.item()
+    loss_history.append(total_loss)
+    epoch_accuracy = correct_pixels / total_pixels
+    accuracies.append(epoch_accuracy)
+    model.eval()
+    val_correct = 0
+    val_total = 0
+    val_preds_all = []
+    val_targets_all = []
+    for val_images, val_masks in val_loader:
+        val_images = val_images.to(config.device).float()
+        val_masks = val_masks.to(config.device).long()
+        val_outputs = model(val_images)
+        val_preds = torch.argmax(val_outputs, dim=1)
+        val_preds_all.append(val_preds.view(-1).cpu().numpy())
+        val_targets_all.append(val_masks.view(-1).cpu().numpy())
+        val_correct += (val_preds == val_masks).sum().item()
+        val_total += torch.numel(val_preds)
+    val_accuracy = val_correct / val_total
+    val_accuracies.append(val_accuracy)
+    val_preds_flat = np.concatenate(val_preds_all)
+    val_targets_flat = np.concatenate(val_targets_all)
+    #iou = jaccard_score(val_targets_flat, val_preds_flat, average='binary')
+    iou = jaccard_score(val_targets_flat, val_preds_flat, average='macro')  # ou 'weighted'
+    intersection = np.logical_and(val_preds_flat, val_targets_flat).sum()
+    union = np.logical_or(val_preds_flat, val_targets_flat).sum()
+    dice = (2 * intersection) / (val_preds_flat.sum() + val_targets_flat.sum() + 1e-6)
+    iou_history.append(iou)
+    dice_history.append(dice)
+    save_report(f"Epoch {epoch+1}/{config.num_epochs}, Loss: {total_loss:.4f}, "
+          f"Train Acc: {epoch_accuracy:.4f}, Val Acc: {val_accuracy:.4f}, "
+          f"IoU: {iou:.4f}, Dice: {dice:.4f}")
+    if (epoch + 1) % config.checkpoint_interval == 0:
+        checkpoint_path = os.path.join(config.checkpoints, f"checkpoint_epoch_{epoch+1}.pt")
+        torch.save({
+            'epoch': epoch + 1,
+            'model_state_dict': model.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'loss': total_loss,
+            'accuracy': epoch_accuracy,
+        }, checkpoint_path)
+    if epoch_accuracy > best_accuracy:
+        save_report(f"🔸 New best model at epoch {epoch+1} (acc: {epoch_accuracy:.4f}) — saving best_model.pt")
+        best_accuracy = epoch_accuracy
+        torch.save(model.state_dict(), os.path.join(config.checkpoints, "best_model.pt"))
+        epochs_no_improve = 0
+    else:
+        epochs_no_improve += 1
+    if epochs_no_improve >= config.early_stop_patience:
+        save_report(f"\n⛔ Early stopping triggered at epoch {epoch+1}")
+        break
+dataHoraFinal = datetime.datetime.now()
+save_report('Completing training on: ' + str(dataHoraFinal))
+save_report('Total training execution time = ' + str((dataHoraFinal - dataHoraInicial)))
+model.eval()
+torch.save(model, config.modelName)
+try:
+    plt.figure(figsize=(8, 5))
+    plt.plot(range(1, len(loss_history)+1), loss_history, marker='o')
+    plt.title("Loss Evolution")
+    plt.xlabel("Epochs")
+    plt.ylabel("Loss")
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig(config.source + 'training_loss.png')
+    plt.figure(figsize=(8, 5))
+    plt.plot(range(1, len(val_accuracies)+1), val_accuracies, marker='o', color='green')
+    plt.title("Validation Accuracy")
+    plt.xlabel("Epochs")
+    plt.ylabel("Pixel Accuracy")
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig(config.source + 'training_val_accuracy.png')
+    plt.figure(figsize=(8, 5))
+    plt.plot(range(1, len(iou_history)+1), iou_history, marker='o', color='purple')
+    plt.title("IoU Evolution")
+    plt.xlabel("Epochs")
+    plt.ylabel("IoU Score")
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig(config.source + 'iou_history.png')
+    plt.figure(figsize=(8, 5))
+    plt.plot(range(1, len(dice_history)+1), dice_history, marker='o', color='orange')
+    plt.title("Dice Score Evolution")
+    plt.xlabel("Epochs")
+    plt.ylabel("Dice Score")
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig(config.source + 'dice_history.png')
+except Exception as e:
+    pass
+save_report("\nTraining Summary:")
+save_report(f"  Min Loss: {min(loss_history):.4f}")
+save_report(f"  Max Loss: {max(loss_history):.4f}")
+save_report(f"  Loss final: {loss_history[-1]:.4f}")
+save_report(f"  Best Val Acc: {max(val_accuracies):.4f}")
+print("\nCompleted ✅")

scripts/Segmentation/Future/train_embedded_explicit_model.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# NOTE: This script is a planned extension for embedded deployment.
+# It has not been tested or benchmarked in this certification phase.
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import scripts.config as config
+from torchvision import transforms
+from torch.utils.data import DataLoader
+from scripts.Segmentation.segDS import SegmentationDataset
+# ----------------------
+# Compact U-Net-like model defined explicitly
+# ----------------------
+class EmbeddedUNet(nn.Module):
+    def __init__(self):
+        super().init()
+        # Encoder
+        self.enc1 = nn.Sequential(
+            nn.Conv2d(1, 8, 3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2)  # 128 -> 64
+        )
+        self.enc2 = nn.Sequential(
+            nn.Conv2d(8, 16, 3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2)  # 64 -> 32
+        )
+        # Bottleneck
+        self.bottleneck = nn.Sequential(
+            nn.Conv2d(16, 32, 3, padding=1),
+            nn.ReLU()
+        )
+        # Decoder
+        self.up1 = nn.Upsample(scale_factor=2, mode='nearest')
+        self.dec1 = nn.Sequential(
+            nn.Conv2d(32, 16, 3, padding=1),
+            nn.ReLU()
+        )
+        self.up2 = nn.Upsample(scale_factor=2, mode='nearest')
+        self.dec2 = nn.Sequential(
+            nn.Conv2d(16, 8, 3, padding=1),
+            nn.ReLU()
+        )
+        # Output
+        self.out = nn.Conv2d(8, 2, kernel_size=1)  # 2 classes: background + object
+    def forward(self, x):
+        x1 = self.enc1(x)
+        x2 = self.enc2(x1)
+        x = self.bottleneck(x2)
+        x = self.up1(x)
+        x = self.dec1(x)
+        x = self.up2(x)
+        x = self.dec2(x)
+        return self.out(x)
+# ----------------------
+# Configuration
+# ----------------------
+IMG_SIZE = (128, 128)
+BATCH_SIZE = 16
+EPOCHS = 20
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+# ----------------------
+# Data Loading
+# ----------------------
+transform = transforms.Compose([
+    transforms.Grayscale(),
+    transforms.Resize(IMG_SIZE),
+    transforms.ToTensor()
+])
+train_ds = SegmentationDataset(config.images,
+                                config.masks,
+                                transform)
+train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
+# ----------------------
+# Training
+# ----------------------
+model = EmbeddedUNet().to(DEVICE)
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=1e-3)
+for epoch in range(EPOCHS):
+    model.train()
+    total_loss = 0.0
+    for imgs, masks in train_dl:
+        imgs, masks = imgs.to(DEVICE), masks.long().to(DEVICE)
+        optimizer.zero_grad()
+        outputs = model(imgs)
+        loss = criterion(outputs, masks)
+        loss.backward()
+        optimizer.step()
+        total_loss += loss.item()
+    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {total_loss:.4f}")
+# ----------------------
+# Save weights only
+# ----------------------
+torch.save(model.state_dict(), 'embedded_model_weights.pth')
+# ----------------------
+# Export to ONNX (float16)
+# ----------------------
+model.eval()
+dummy_input = torch.randn(1, 1, *IMG_SIZE).to(DEVICE)
+torch.onnx.export(
+    model.half(),                  # Convert model to float16
+    dummy_input.half(),            # Dummy input in float16
+    "embedded_model_fp16.onnx",
+    input_names=["input"],
+    output_names=["output"],
+    opset_version=12,
+    do_constant_folding=True,
+    dynamic_axes={'input': {0: 'batch'}, 'output': {0: 'batch'}}
+)
+print("✅ Model exported as embedded_model_fp16.onnx")

scripts/Segmentation/__init__.py ADDED Viewed

File without changes

scripts/Segmentation/app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+import sys
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+import torch
+import numpy as np
+import gradio as gr
+from PIL import Image
+import scripts.config as config
+import torchvision.transforms as transforms
+import scripts.Segmentation.augment as augment
+from scripts.Segmentation.models import ResNetUNet
+config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+modelo = '/best_model.pt'
+model = ResNetUNet(num_classes=2).to(config.device)
+model.load_state_dict(torch.load(config.checkpoints + modelo, map_location=config.device))
+model.eval()
+transform = transforms.Compose([
+    transforms.Grayscale(num_output_channels=1),
+    transforms.Resize((config.height, config.width)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.5], std=[0.5])
+])
+def segment_image(input_img):
+    input_img = input_img.convert('L')
+    img_tensor = transform(input_img).unsqueeze(0).to(config.device)  # (1, 1, H, W)
+    output = None
+    if config.USE_TTA:
+        output = augment.predict_with_tta(model, img_tensor)
+    else:
+        output = model(img_tensor)
+    probs = torch.softmax(output, dim=1)  # (1, 2, H, W)
+    mask = torch.argmax(probs, dim=1).squeeze(0)
+    if config.USE_REFINEMENT:
+        mask = augment.refine_mask(mask)
+    mask = mask.cpu().numpy()
+    mask_img = Image.fromarray((mask * 255).astype(np.uint8))
+    return input_img, mask_img
+demo = gr.Interface(
+    fn=segment_image,
+    inputs=gr.Image(type="pil", label="Input image"),
+    outputs=[
+        gr.Image(type="pil", label="Original image"),
+        gr.Image(type="pil", label="Segmented mask"),
+    ],
+    title="ResNet-UNet Image Segmentator",
+    description="Send an image and see the segmentation result generated by the trained model."
+)
+if __name__ == "__main__":
+    demo.launch(share=True)

scripts/Segmentation/augment.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import cv2
+import torch
+import numpy as np
+def predict_with_tta(model, image):
+    transforms = [
+        lambda x: x,
+        lambda x: torch.flip(x, dims=[3]),
+        lambda x: torch.rot90(x, 1, [2, 3])
+    ]
+    predictions = []
+    for tf in transforms:
+        aug = tf(image)
+        #with torch.no_grad():
+        pred = model(aug)
+        inv_pred = tf(pred)
+        #predictions.append(torch.softmax(inv_pred, dim=1))
+        predictions.append(inv_pred)
+    #avg_pred = torch.stack(predictions).mean(0)
+    #return torch.argmax(avg_pred, dim=1).squeeze(0)
+    avg_logits = torch.stack(predictions).mean(0)  # [B, C, H, W]
+    return avg_logits
+def refine_mask(mask_tensor):
+    mask = mask_tensor.cpu().numpy().astype(np.uint8)
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+    closed = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
+    opened = cv2.morphologyEx(closed, cv2.MORPH_OPEN, kernel)
+    return torch.from_numpy(opened).to(mask_tensor.device)

scripts/Segmentation/diceLossCriterion.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+import torch.nn as nn
+class DiceLoss(nn.Module):
+    def __init__(self, smooth=1e-6):
+        super(DiceLoss, self).__init__()
+        self.smooth = smooth
+    def forward(self, logits, targets):
+        logits = logits.float()  # [B, C, H, W]
+        probs = torch.softmax(logits, dim=1)
+        preds = probs[:, 1, :, :]  # [B, H, W]
+        if targets.ndim == 4:
+            targets = targets.squeeze(1)  # [B, H, W]
+        targets = (targets == 1).float()  # binariza se necessário
+        intersection = (preds * targets).sum(dim=(1, 2))
+        union = preds.sum(dim=(1, 2)) + targets.sum(dim=(1, 2))
+        dice = (2 * intersection + self.smooth) / (union + self.smooth)
+        return 1 - dice.mean()

scripts/Segmentation/evaluate_model.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import os
+import cv2
+import torch
+import numpy as np
+from PIL import Image
+import scripts.config as config
+import matplotlib.pyplot as plt
+import torchvision.transforms as transforms
+import scripts.Segmentation.augment as augment
+from scripts.Segmentation.models import ResNetUNet
+def run():
+    config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    modelo = "/best_model.pt"
+    model = ResNetUNet(num_classes=2)
+    model.load_state_dict(torch.load(config.checkpoints + modelo, map_location=config.device))
+    #modelo = "/modelo_completo.pth"
+    #model = torch.load(config.checkpoints + modelo, map_location=config.device) #full model
+    #modelo = "/checkpoint_epoch_20.pt"
+    #checkpoint = torch.load(config.checkpoints + modelo, map_location=config.device)
+    #model = ResNetUNet(num_classes=2)
+    #model.load_state_dict(checkpoint['model_state_dict'])
+    model.to(config.device)
+    model.eval()
+    transform = transforms.Compose([
+        transforms.Grayscale(num_output_channels=1),
+        transforms.Resize((config.height, config.width)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5], std=[0.5])
+    ])
+    eval_dir = config.extraTests
+    image_files = [f for f in os.listdir(eval_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
+    print(f"Found {len(image_files)} images for evaluation.")
+    for file_name in image_files:
+        img_path = os.path.join(eval_dir, file_name)
+        image = Image.open(img_path).convert("L")  # Grayscale
+        input_tensor = transform(image).unsqueeze(0).to(config.device)  # shape: [1, 1, H, W]
+        #with torch.no_grad():
+        if config.USE_TTA:
+            output = augment.predict_with_tta(model, input_tensor)
+        else:
+            output = model(input_tensor)
+        output = torch.softmax(output, dim=1)
+        output = torch.argmax(output, dim=1).squeeze(0)
+        if config.USE_REFINEMENT:
+            output = augment.refine_mask(output)
+        predicted_mask = output.cpu().numpy()  # shape [H, W]
+        image_np = np.array(image.resize((config.width, config.height)), dtype=np.float32) / 255.0  # [H, W]
+        mask_overlay = np.zeros((config.height, config.width, 3), dtype=np.float32)
+        mask_overlay[..., 0] = predicted_mask  # vermelho onde a máscara = 1
+        image_rgb = np.stack([image_np]*3, axis=-1)
+        alpha = 0.4
+        blended = (1 - alpha) * image_rgb + alpha * mask_overlay
+        fig, axs = plt.subplots(1, 3, figsize=(15, 4))
+        axs[0].imshow(image_np, cmap='gray')
+        axs[0].set_title('Rated Image')
+        axs[0].axis('off')
+        axs[1].imshow(predicted_mask, cmap='jet')
+        axs[1].set_title('Predicted Mask')
+        axs[1].axis('off')
+        axs[2].imshow(blended)
+        axs[2].set_title('Overlay')
+        axs[2].axis('off')
+        plt.suptitle(f"Rating: {file_name}", fontsize=12)
+        plt.tight_layout()
+        plt.show()
+        input("Press ENTER to continue...")
+    print('\n\nCompleted...')
+if __name__ == "__main__":
+    run()

scripts/Segmentation/focalLoss.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import torch
+import torch.nn as nn
+class FocalLoss(nn.Module):
+    def __init__(self, gamma=2.0, weight=None):
+        super(FocalLoss, self).__init__()
+        self.gamma = gamma
+        self.ce = nn.CrossEntropyLoss(weight=weight)
+    def forward(self, logits, targets):
+        logits = logits.float()
+        ce_loss = self.ce(logits, targets)
+        pt = torch.exp(-ce_loss)
+        return ((1 - pt) ** self.gamma * ce_loss).mean()

scripts/Segmentation/models.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+class ResNetUNet(nn.Module):
+    def __init__(self, num_classes=2):
+        super(ResNetUNet, self).__init__()
+        resnet = models.resnet50(pretrained=True)
+        resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        nn.init.kaiming_normal_(resnet.conv1.weight, mode='fan_out', nonlinearity='relu')
+        self.input_block = nn.Sequential(
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu
+        )
+        self.maxpool = resnet.maxpool
+        self.encoder1 = resnet.layer1  # 64→256
+        self.encoder2 = resnet.layer2  # 256→512
+        self.encoder3 = resnet.layer3  # 512→1024
+        self.bottleneck = resnet.layer4  # 1024→2048
+        self.up1 = nn.ConvTranspose2d(2048, 1024, kernel_size=2, stride=2)
+        self.up2 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
+        self.up3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
+        self.up4 = nn.ConvTranspose2d(256, 64, kernel_size=2, stride=2)
+        self.up5 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(1024, 1024, kernel_size=3, padding=1),
+            nn.BatchNorm2d(1024),
+            nn.ReLU()
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(512, 512, kernel_size=3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU()
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(256, 256, kernel_size=3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU()
+        )
+        self.conv4 = nn.Sequential(
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU()
+        )
+        self.out_conv = nn.Conv2d(64, num_classes, kernel_size=1)
+    def forward(self, x):
+        x0 = self.input_block(x)
+        x1 = self.maxpool(x0)
+        x2 = self.encoder1(x1)
+        x3 = self.encoder2(x2)
+        x4 = self.encoder3(x3)
+        x5 = self.bottleneck(x4)
+        d1 = F.relu(self.up1(x5) + x4)
+        d1 = self.conv1(d1)
+        d2 = F.relu(self.up2(d1) + x3)
+        d2 = self.conv2(d2)
+        d3 = F.relu(self.up3(d2) + x2)
+        d3 = self.conv3(d3)
+        d4 = F.relu(self.up4(d3) + x0)
+        d4 = self.conv4(d4)
+        d5 = self.up5(d4)
+        out = self.out_conv(d5)
+        return out

scripts/Segmentation/segDS.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import os
+import torch
+import numpy as np
+from PIL import Image
+import scripts.config as config
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+class SegmentationDataset(Dataset):
+    def __init__(self, transform=None):
+        self.image_dir = config.images
+        self.mask_dir = config.masks
+        self.transform = transform
+        paths = [os.path.join(self.image_dir, f) for f in os.listdir(self.image_dir) if f.lower().endswith('.jpg')]
+        self.image_files = [os.path.basename(f) for f in paths]
+    def __len__(self):
+        return len(self.image_files)
+    def __getitem__(self, idx):
+        img_name = self.image_files[idx]
+        img_path = os.path.join(self.image_dir, img_name)
+        mask_path = os.path.join(self.mask_dir, img_name.replace('.jpg', '_mask.png'))
+        if not os.path.exists(mask_path):
+            raise FileNotFoundError(f"Mask not found for: {img_name}")
+        image = Image.open(img_path).convert("L")
+        mask = Image.open(mask_path).convert("L")
+        if self.transform:
+            image = self.transform(image)
+        mask = np.array(mask)
+        mask = (mask > 127).astype(np.uint8)
+        mask = torch.from_numpy(mask).long()
+        unique_vals = np.unique(mask)
+        if not set(unique_vals).issubset({0, 1}):
+            raise ValueError(f"Mask contains invalid values: {unique_vals}")
+        return image, mask

scripts/Segmentation/train.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import os
+import torch
+import datetime
+import numpy as np
+import torch.nn as nn
+import torch.optim as optim
+import matplotlib.pyplot as plt
+import scripts.config as config
+from torch.utils.data import DataLoader
+from sklearn.metrics import jaccard_score
+from torch.utils.data import random_split
+import torchvision.transforms as transforms
+import scripts.Segmentation.augment as augment
+from scripts.Segmentation.models import ResNetUNet
+from warmup_scheduler import GradualWarmupScheduler
+from scripts.Segmentation.focalLoss import FocalLoss
+from scripts.Segmentation.segDS import SegmentationDataset
+from scripts.Segmentation.diceLossCriterion import DiceLoss
+def save_report(row):
+    print(str(row))
+    with open(config.report_file, 'a', encoding='utf-8') as f:
+        f.write(str(row) + '\n')
+def compute_class_weights(dataset):
+    class_counts = torch.zeros(2)
+    for _, mask in dataset:
+        pixels = mask.view(-1)
+        for c in [0, 1]:
+            class_counts[c] += (pixels == c).sum()
+    weights = class_counts.sum() / (2.0 * class_counts + 1e-6)
+    weights = weights / weights.sum()
+    return weights
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+try:
+    save_report(torch.__version__)
+    save_report(torch.version.cuda)
+    save_report(torch.cuda.get_arch_list()) #['sm_75', 'sm_86']
+except Exception as e:
+    save_report(e)
+    pass
+config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print('Starting processing...')
+os.makedirs(config.checkpoints, exist_ok=True)
+transform = transforms.Compose([
+    transforms.Resize((config.height, config.width)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.5], std=[0.5])
+])
+dataset = SegmentationDataset(transform = transform)
+train_size = int(0.8 * len(dataset))
+val_size = len(dataset) - train_size
+train_ds, val_ds = random_split(dataset, [train_size, val_size])
+train_loader = DataLoader(train_ds, batch_size=4, shuffle=True)
+val_loader = DataLoader(val_ds, batch_size=4, shuffle=False)
+model = ResNetUNet(num_classes=2).to(config.device)
+class_weights = compute_class_weights(dataset).to(config.device)
+criterion = None
+if config.USE_FOCAL_LOSS:
+    criterion = FocalLoss(gamma=2.0)
+else:
+    criterion = nn.CrossEntropyLoss(weight = class_weights)
+#optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
+optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
+#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
+#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.num_epochs)
+cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.num_epochs)
+scheduler = GradualWarmupScheduler(optimizer, multiplier=1.0, total_epoch=5, after_scheduler=cosine_scheduler)
+dataHoraInicial = datetime.datetime.now()
+save_report('\n\n\nStarting training on: ' + str(dataHoraInicial))
+accuracies = []
+iou_history = []
+loss_history = []
+dice_history = []
+val_accuracies = []
+best_accuracy = 0.0
+dice_loss = DiceLoss()
+epochs_no_improve = 0
+model.train()
+for epoch in range(config.num_epochs):
+    total_loss = 0
+    correct_pixels = 0
+    total_pixels = 0
+    for images, masks in train_loader:
+        images = images.to(config.device).float()
+        masks = masks.to(config.device).long()
+        output = []
+        if config.USE_TTA:
+            for img in images:
+                preds = augment.predict_with_tta(model, img.unsqueeze(0))  # [1, C, H, W]
+                output.append(preds)
+            output = torch.cat(output, dim=0)  # [B, C, H, W]
+        else:
+            output = model(images)
+        output = output.float()
+        masks = masks.long()
+        loss = 1.5 * dice_loss(output, masks) + 0.5 * criterion(output, masks)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        scheduler.step()
+        preds = torch.argmax(output, dim=1)
+        if config.USE_REFINEMENT:
+            preds = augment.refine_mask(preds)
+        correct_pixels += (preds == masks).sum().item()
+        total_pixels += torch.numel(preds)
+        total_loss += loss.item()
+    loss_history.append(total_loss)
+    epoch_accuracy = correct_pixels / total_pixels
+    accuracies.append(epoch_accuracy)
+    model.eval()
+    val_correct = 0
+    val_total = 0
+    val_preds_all = []
+    val_targets_all = []
+    for val_images, val_masks in val_loader:
+        val_images = val_images.to(config.device).float()
+        val_masks = val_masks.to(config.device).long()
+        val_outputs = model(val_images)
+        val_preds = torch.argmax(val_outputs, dim=1)
+        val_preds_all.append(val_preds.view(-1).cpu().numpy())
+        val_targets_all.append(val_masks.view(-1).cpu().numpy())
+        val_correct += (val_preds == val_masks).sum().item()
+        val_total += torch.numel(val_preds)
+    val_accuracy = val_correct / val_total
+    val_accuracies.append(val_accuracy)
+    val_preds_flat = np.concatenate(val_preds_all)
+    val_targets_flat = np.concatenate(val_targets_all)
+    #iou = jaccard_score(val_targets_flat, val_preds_flat, average='binary')
+    iou = jaccard_score(val_targets_flat, val_preds_flat, average='macro')  # ou 'weighted'
+    intersection = np.logical_and(val_preds_flat, val_targets_flat).sum()
+    union = np.logical_or(val_preds_flat, val_targets_flat).sum()
+    dice = (2 * intersection) / (val_preds_flat.sum() + val_targets_flat.sum() + 1e-6)
+    iou_history.append(iou)
+    dice_history.append(dice)
+    save_report(f"Epoch {epoch+1}/{config.num_epochs}, Loss: {total_loss:.4f}, "
+          f"Train Acc: {epoch_accuracy:.4f}, Val Acc: {val_accuracy:.4f}, "
+          f"IoU: {iou:.4f}, Dice: {dice:.4f}")
+    if (epoch + 1) % config.checkpoint_interval == 0:
+        checkpoint_path = os.path.join(config.checkpoints, f"checkpoint_epoch_{epoch+1}.pt")
+        torch.save({
+            'epoch': epoch + 1,
+            'model_state_dict': model.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'loss': total_loss,
+            'accuracy': epoch_accuracy,
+        }, checkpoint_path)
+    if epoch_accuracy > best_accuracy:
+        save_report(f"🔸 New best model at epoch {epoch+1} (acc: {epoch_accuracy:.4f}) — saving best_model.pt")
+        best_accuracy = epoch_accuracy
+        torch.save(model.state_dict(), os.path.join(config.checkpoints, "best_model.pt"))
+        epochs_no_improve = 0
+    else:
+        epochs_no_improve += 1
+    if epochs_no_improve >= config.early_stop_patience:
+        save_report(f"\n⛔ Early stopping triggered at epoch {epoch+1}")
+        break
+dataHoraFinal = datetime.datetime.now()
+save_report('Completing training on: ' + str(dataHoraFinal))
+save_report('Total training execution time = ' + str((dataHoraFinal - dataHoraInicial)))
+model.eval()
+torch.save(model, config.modelName)
+try:
+    plt.figure(figsize=(8, 5))
+    plt.plot(range(1, len(loss_history)+1), loss_history, marker='o')
+    plt.title("Loss Evolution")
+    plt.xlabel("Epochs")
+    plt.ylabel("Loss")
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig(config.source + 'training_loss.png')
+    plt.figure(figsize=(8, 5))
+    plt.plot(range(1, len(val_accuracies)+1), val_accuracies, marker='o', color='green')
+    plt.title("Validation Accuracy")
+    plt.xlabel("Epochs")
+    plt.ylabel("Pixel Accuracy")
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig(config.source + 'training_val_accuracy.png')
+    plt.figure(figsize=(8, 5))
+    plt.plot(range(1, len(iou_history)+1), iou_history, marker='o', color='purple')
+    plt.title("IoU Evolution")
+    plt.xlabel("Epochs")
+    plt.ylabel("IoU Score")
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig(config.source + 'iou_history.png')
+    plt.figure(figsize=(8, 5))
+    plt.plot(range(1, len(dice_history)+1), dice_history, marker='o', color='orange')
+    plt.title("Dice Score Evolution")
+    plt.xlabel("Epochs")
+    plt.ylabel("Dice Score")
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig(config.source + 'dice_history.png')
+except Exception as e:
+    pass
+save_report("\nTraining Summary:")
+save_report(f"  Min Loss: {min(loss_history):.4f}")
+save_report(f"  Max Loss: {max(loss_history):.4f}")
+save_report(f"  Loss final: {loss_history[-1]:.4f}")
+save_report(f"  Best Val Acc: {max(val_accuracies):.4f}")
+print("\nCompleted ✅")

scripts/__init__.py ADDED Viewed

File without changes

scripts/config.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+device = ''
+width = 512
+height = 512
+num_epochs = 100
+checkpoint_interval = 2
+early_stop_patience = 10
+USE_TTA = True
+USE_REFINEMENT = True
+USE_FOCAL_LOSS = False
+source = str(os.path.dirname(os.path.realpath(__file__))).replace('scripts', '')
+images = source + 'DataSet/images/'
+masks = source + 'DataSet/masks/'
+annotations = source + 'DataSet/annotations/'
+extraTests = source + 'DataSet/ExtraTests/'
+tempImages = source + 'DataSet/tempImages/'
+checkpoints = source + 'checkpoints/'
+modelName = checkpoints + 'modelo_completo.pth'# 'modelo_completo.pth'
+report_file = source + 'report_file.txt'