Facebook
TwitterAttribution-ShareAlike 3.0 (CC BY-SA 3.0)https://creativecommons.org/licenses/by-sa/3.0/
License information was derived automatically
Imports:
# All Imports
import os
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.calibration import LabelEncoder
import seaborn as sns
import matplotlib.image as mpimg
import cv2
import numpy as np
import pickle
# Tensflor and Keras Layer and Model and Optimize and Loss
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import *
#Kernel Intilizer
from keras.optimizers import Adamax
# PreTrained Model
from keras.applications import *
#Early Stopping
from keras.callbacks import EarlyStopping
import warnings
Warnings Suppression | Configuration
# Warnings Remove
warnings.filterwarnings("ignore")
# Define the base path for the training folder
base_path = 'jaguar_cheetah/train'
# Weights file
weights_file = 'Model_train_weights.weights.h5'
# Path to the saved or to save the model:
model_file = 'Model-cheetah_jaguar_Treined.keras'
# Model history
history_path = 'training_history_cheetah_jaguar.pkl'
# Initialize lists to store file paths and labels
filepaths = []
labels = []
# Iterate over folders and files within the training directory
for folder in ['Cheetah', 'Jaguar']:
folder_path = os.path.join(base_path, folder)
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
filepaths.append(file_path)
labels.append(folder)
# Create the TRAINING dataframe
file_path_series = pd.Series(filepaths , name= 'filepath')
Label_path_series = pd.Series(labels , name = 'label')
df_train = pd.concat([file_path_series ,Label_path_series ] , axis = 1)
# Define the base path for the test folder
directory = "jaguar_cheetah/test"
filepath =[]
label = []
folds = os.listdir(directory)
for fold in folds:
f_path = os.path.join(directory , fold)
imgs = os.listdir(f_path)
for img in imgs:
img_path = os.path.join(f_path , img)
filepath.append(img_path)
label.append(fold)
# Create the TEST dataframe
file_path_series = pd.Series(filepath , name= 'filepath')
Label_path_series = pd.Series(label , name = 'label')
df_test = pd.concat([file_path_series ,Label_path_series ] , axis = 1)
# Display the first rows of the dataframe for verification
#print(df_train)
# Folders with Training and Test files
data_dir = 'jaguar_cheetah/train'
test_dir = 'jaguar_cheetah/test'
# Image size 256x256
IMAGE_SIZE = (256,256)
Tain | Test
#print('Training Images:')
# Create the TRAIN dataframe
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.1,
subset='training',
seed=123,
image_size=IMAGE_SIZE,
batch_size=32)
#Testing Data
#print('Validation Images:')
validation_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.1,
subset='validation',
seed=123,
image_size=IMAGE_SIZE,
batch_size=32)
print('Testing Images:')
test_ds = tf.keras.utils.image_dataset_from_directory(
test_dir,
seed=123,
image_size=IMAGE_SIZE,
batch_size=32)
# Extract labels
train_labels = train_ds.class_names
test_labels = test_ds.class_names
validation_labels = validation_ds.class_names
# Encode labels
# Defining the class labels
class_labels = ['CHEETAH', 'JAGUAR']
# Instantiate (encoder) LabelEncoder
label_encoder = LabelEncoder()
# Fit the label encoder on the class labels
label_encoder.fit(class_labels)
# Transform the labels for the training dataset
train_labels_encoded = label_encoder.transform(train_labels)
# Transform the labels for the validation dataset
validation_labels_encoded = label_encoder.transform(validation_labels)
# Transform the labels for the testing dataset
test_labels_encoded = label_encoder.transform(test_labels)
# Normalize the pixel values
# Train files
train_ds = train_ds.map(lambda x, y: (x / 255.0, y))
# Validate files
validation_ds = validation_ds.map(lambda x, y: (x / 255.0, y))
# Test files
test_ds = test_ds.map(lambda x, y: (x / 255.0, y))
#TRAINING VISUALIZATION
#Count the occurrences of each category in the column
count = df_train['label'].value_counts()
# Create a figure with 2 subplots
fig, axs = plt.subplots(1, 2, figsize=(12, 6), facecolor='white')
# Plot a pie chart on the first subplot
palette = sns.color_palette("viridis")
sns.set_palette(palette)
axs[0].pie(count, labels=count.index, autopct='%1.1f%%', startangle=140)
axs[0].set_title('Distribution of Training Categories')
# Plot a bar chart on the second subplot
sns.barplot(x=count.index, y=count.values, ax=axs[1], palette="viridis")
axs[1].set_title('Count of Training Categories')
# Adjust the layout
plt.tight_layout()
# Visualize
plt.show()
# TEST VISUALIZATION
count = df_test['label'].value_counts()
# Create a figure with 2 subplots
fig, axs = plt.subplots(1, 2, figsize=(12, 6), facec...
Facebook
TwitterAn audio dataset of spoken words designed to help train and evaluate keyword spotting systems. Its primary goal is to provide a way to build and test small models that detect when a single word is spoken, from a set of ten target words, with as few false positives as possible from background noise or unrelated speech. Note that in the train and validation set, the label "unknown" is much more prevalent than the labels of the target words or background noise. One difference from the release version is the handling of silent segments. While in the test set the silence segments are regular 1 second files, in the training they are provided as long segments under "background_noise" folder. Here we split these background noise into 1 second clips, and also keep one of the files for the validation set.
To use this dataset:
import tensorflow_datasets as tfds
ds = tfds.load('speech_commands', split='train')
for ex in ds.take(4):
print(ex)
See the guide for more informations on tensorflow_datasets.
Facebook
TwitterBacteria identification based on genomic sequences holds the promise of early detection of diseases, but requires a model that can output low confidence predictions on out-of-distribution (OOD) genomic sequences from new bacteria that were not present in the training data.
We introduce a genomics dataset for OOD detection that allows other researchers to benchmark progress on this important problem. New bacterial classes are gradually discovered over the years. Grouping classes by years is a natural way to mimic the in-distribution and OOD examples.
The dataset contains genomic sequences sampled from 10 bacteria classes that were discovered before the year 2011 as in-distribution classes, 60 bacteria classes discovered between 2011-2016 as OOD for validation, and another 60 different bacteria classes discovered after 2016 as OOD for test, in total 130 bacteria classes. Note that training, validation, and test data are provided for the in-distribution classes, and validation and test data are proviede for OOD classes. By its nature, OOD data is not available at the training time.
The genomic sequence is 250 long, composed by characters of {A, C, G, T}. The sample size of each class is 100,000 in the training and 10,000 for the validation and test sets.
For each example, the features include: seq: the input DNA sequence composed by {A, C, G, T}. label: the name of the bacteria class. seq_info: the source of the DNA sequence, i.e., the genome name, NCBI accession number, and the position where it was sampled from. domain: if the bacteria is in-distribution (in), or OOD (ood)
The details of the dataset can be found in the paper supplemental.
To use this dataset:
import tensorflow_datasets as tfds
ds = tfds.load('genomics_ood', split='train')
for ex in ds.take(4):
print(ex)
See the guide for more informations on tensorflow_datasets.
Facebook
TwitterAttribution 4.0 (CC BY 4.0)https://creativecommons.org/licenses/by/4.0/
License information was derived automatically
The dataset contains images categorized into sehat and tidak sehat , organized into train , test , and validation folders, each with subfolders for each class ( /sehat and /tidak sehat ). Images are in JPEG or PNG format with a recommended resolution of 240x240 pixels, suitable for the VGG16 model’s input requirements. The dataset is intended for deep learning applications, viewable with standard image viewers, and executable with Python, particularly using TensorFlow and Keras . To access and run the VGG16 model, Google Colab or Jupyter Notebook can be used for cloud. For processing, an image data generator is set up to normalize the images, while VGG16 (with pre-trained ImageNet weights) serves as the base model with added dense layers for binary classification between sehat and tidak sehat . The model can then be compiled with an optimizer (e.g., Adam) and trained on the data with appropriate evaluation on validation and test sets.
Facebook
TwitterMoving variant of MNIST database of handwritten digits. This is the
data used by the authors for reporting model performance. See
tfds.video.moving_mnist.image_as_moving_sequence
for generating training/validation data from the MNIST dataset.
To use this dataset:
import tensorflow_datasets as tfds
ds = tfds.load('moving_mnist', split='train')
for ex in ds.take(4):
print(ex)
See the guide for more informations on tensorflow_datasets.
Facebook
Twitterhttp://www.apache.org/licenses/LICENSE-2.0http://www.apache.org/licenses/LICENSE-2.0
DeepWeeds: A Multiclass Weed Species Image Dataset for Deep Learning
This repository makes available the source code and public dataset for the work, "DeepWeeds: A Multiclass Weed Species Image Dataset for Deep Learning", published with open access by Scientific Reports: https://www.nature.com/articles/s41598-018-38343-3. The DeepWeeds dataset consists of 17,509 images capturing eight different weed species native to Australia in situ with neighbouring flora. In our work, the dataset was classified to an average accuracy of 95.7% with the ResNet50 deep convolutional neural network.
The source code, images and annotations are licensed under CC BY 4.0 license. The contents of this repository are released under an Apache 2 license.
Download the dataset images and our trained models
images.zip (468 MB)
models.zip (477 MB)
Due to the size of the images and models they are hosted outside of the Github repository. The images and models must be downloaded into directories named "images" and "models", respectively, at the root of the repository. If you execute the python script (deepweeds.py), as instructed below, this step will be performed for you automatically.
TensorFlow Datasets
Alternatively, you can access the DeepWeeds dataset with TensorFlow Datasets, TensorFlow's official collection of ready-to-use datasets. DeepWeeds was officially added to the TensorFlow Datasets catalog in August 2019.
Weeds and locations
The selected weed species are local to pastoral grasslands across the state of Queensland. They include: "Chinee apple", "Snake weed", "Lantana", "Prickly acacia", "Siam weed", "Parthenium", "Rubber vine" and "Parkinsonia". The images were collected from weed infestations at the following sites across Queensland: "Black River", "Charters Towers", "Cluden", "Douglas", "Hervey Range", "Kelso", "McKinlay" and "Paluma". The table and figure below break down the dataset by weed, location and geographical distribution.
Data organization
Images are assigned unique filenames that include the date/time the image was photographed and an ID number for the instrument which produced the image. The format is like so: YYYYMMDD-HHMMSS-ID, where the ID is simply an integer from 0 to 3. The unique filenames are strings of 17 characters, such as 20170320-093423-1.
labels
The labels.csv file assigns species labels to each image. It is a comma separated text file in the format:
Filename,Label,Species ... 20170207-154924-0,jpg,7,Snake weed 20170610-123859-1.jpg,1,Lantana 20180119-105722-1.jpg,8,Negative ...
Note: The specific label subsets of training (60%), validation (20%) and testing (20%) for the five-fold cross validation used in the paper are also provided here as CSV files in the same format as "labels.csv".
models
We provide the most successful ResNet50 and InceptionV3 models saved in Keras' hdf5 model format. The ResNet50 model, which provided the best results, has also been converted to UFF format in order to construct a TensorRT inference engine.
resnet.hdf5 inception.hdf5 resnet.uff
deepweeds.py
This python script trains and evaluates Keras' base implementation of ResNet50 and InceptionV3 on the DeepWeeds dataset, pre-trained with ImageNet weights. The performance of the networks are cross validated for 5 folds. The final classification accuracy is taken to be the average across the five folds. Similarly, the final confusion matrix from the associated paper aggregates across the five independent folds. The script also provides the ability to measure the inference speeds within the TensorFlow environment.
The script can be executed to carry out these computations using the following commands.
To train and evaluate the ResNet50 model with five-fold cross validation, use python3 deepweeds.py cross_validate --model resnet.
To train and evaluate the InceptionV3 model with five-fold cross validation, use python3 deepweeds.py cross_validate --model inception.
To measure inference times for the ResNet50 model, use python3 deepweeds.py inference --model models/resnet.hdf5.
To measure inference times for the InceptionV3 model, use python3 deepweeds.py inference --model models/inception.hdf5.
Dependencies
The required Python packages to execute deepweeds.py are listed in requirements.txt.
tensorrt
This folder includes C++ source code for creating and executing a ResNet50 TensorRT inference engine on an NVIDIA Jetson TX2 platform. To build and run on your Jetson TX2, execute the following commands:
cd tensorrt/src make -j4 cd ../bin ./resnet_inference
Citations
If you use the DeepWeeds dataset in your work, please cite it as:
IEEE style citation: “A. Olsen, D. A. Konovalov, B. Philippa, P. Ridd, J. C. Wood, J. Johns, W. Banks, B. Girgenti, O. Kenny, J. Whinney, B. Calvert, M. Rahimi Azghadi, and R. D. White, “DeepWeeds: A Multiclass Weed Species Image Dataset for Deep Learning,” Scientific Reports, vol. 9, no. 2058, 2 2019. [Online]. Available: https://doi.org/10.1038/s41598-018-38343-3 ”
BibTeX
@article{DeepWeeds2019, author = {Alex Olsen and Dmitry A. Konovalov and Bronson Philippa and Peter Ridd and Jake C. Wood and Jamie Johns and Wesley Banks and Benjamin Girgenti and Owen Kenny and James Whinney and Brendan Calvert and Mostafa {Rahimi Azghadi} and Ronald D. White}, title = {{DeepWeeds: A Multiclass Weed Species Image Dataset for Deep Learning}}, journal = {Scientific Reports}, year = 2019, number = 2058, month = 2, volume = 9, issue = 1, day = 14, url = "https://doi.org/10.1038/s41598-018-38343-3", doi = "10.1038/s41598-018-38343-3" }
Facebook
Twitterhttps://creativecommons.org/publicdomain/zero/1.0/https://creativecommons.org/publicdomain/zero/1.0/
BTFER 7
This is a dataset to use to apply cross-validation against CNN models.
I have been tested with all the available models in Tensorflow Keras library and other models such as RepVGG, AlexNet, LeNet5, GoogLeNet, VGGFace, ResNet18 among others.
The dataset was collected using google images API in Python, once a significant numbers of images were collected, the dataset was cleaned manually to deleted unwanted images and file formats, after that the dataset was passed through MTCNN to detect and extract the detected faces.
After we have created the faces dataset, we proceed to extract 300 images randomly for each class, this to avoid personal biases.
The dataset contains images representing different gender, ethnicities and ages
Facebook
TwitterOn February 8, 2021, Deception Island Chinstrap penguin colonies were photographed during the PiMetAn Project XXXIV Spanish Antarctic campaign using unmanned aerial vehicles (UAV) at a height of 30m. From the obtained imagery, a training dataset for penguin detection from aerial perspective was generated. The penguin species is the Chinstrap penguin (Pygoscelis antarcticus). The dataset consists of three folders: "train", containing 531 images, intended for model training; "valid", containing 50 images, intended for model validation; and "test", containing 25 images, intended for model testing. In each of the three folders, an additional .csv file is located, containing labels (x,y positions and class names for every penguin in the images), annotated in Tensorflow Object Detection format. There is only one annotation class: Penguin. All 606 images are 224x224 px in size, and 96 dpi. The following augmentation was applied to create 3 versions of each source image: * Random shear of between -18° to +18° horizontally and -11° to +11° vertically This dataset was annotated and exported via www.roboflow.com The model Faster R-CNN64 with ResNet-101 backbone was used to perform object detection tasks. Training and evaluation tasks were performed using the TensorFlow 2.0 machine learning platform by Google.
Facebook
TwitterCOCO is a large-scale object detection, segmentation, and captioning dataset.
Note: * Some images from the train and validation sets don't have annotations. * Coco 2014 and 2017 uses the same images, but different train/val/test splits * The test split don't have any annotations (only images). * Coco defines 91 classes but the data only uses 80 classes. * Panotptic annotations defines defines 200 classes but only uses 133.
To use this dataset:
import tensorflow_datasets as tfds
ds = tfds.load('coco', split='train')
for ex in ds.take(4):
print(ex)
See the guide for more informations on tensorflow_datasets.
https://storage.googleapis.com/tfds-data/visualization/fig/coco-2014-1.1.0.png" alt="Visualization" width="500px">
Facebook
TwitterAttribution 4.0 (CC BY 4.0)https://creativecommons.org/licenses/by/4.0/
License information was derived automatically
This repository contains the Python scripts built for training and evaluation of the implementation, together with the test data and the resulting road segmentation models corresponding to the paper "Insights into the Effects of Image Overlap and Image Size on Semantic Segmentation Models Trained for Road Surface Area Extraction from Aerial Orthophotography". The scripts make use of the Tensorflow with Keras framework and their additional required dependencies.
The training and validation set is based on the binary SROADEX dataset (https://zenodo.org/records/6482346) that was re-split into tiles that feature the image resolutions (256 x 256, 512 x 512, and 1024 x 1024 pixels) and image overlaps (0% and 12.5%) considered in this study. The data have been generated using scripts developed in Python using Open Source libraries (GDAL/OGR and MapScript) for rasterization of vector cartography that represents the axes of the different types of roads (urban, interurban and rural). This binary road data contains information from 16 full orthoimages (28.5 km * 18.5 km) with spatial resolution of 0.5 m/pixel from the insular and peninsular Spanish territory. Due to the size on disk of approximately 492 gigabytes, this training and validation data is only available upon request from the corresponding author. The test set has been generated from a novel area from Palencia (Spain) and features 18 million pixels labelled with the positive "Road" class. The test sets are provided in the repository for each resolution (with no overlap), so that additional DL models can be evaluated on the same data and compared with the results achieved in this study.
The structure of the information shared in this repository is as follows:
The scripts have been grouped by tile resolution (256, 512 and 1024). First, the test set and the evaluation script can be found. For each tile resolution, there are two subfolders (corresponding to the "no overlap" and "12.5% overlap"). In each case, the Python scripts for training the models in the three repetitions are shared, and the trained models (H5 format) are shared in compressed form. Finally, for each resolution we also share the testing dataset which consists of two folders.
The material is distributed under a CC-BY 4.0 license.
Facebook
Twitterhttps://creativecommons.org/publicdomain/zero/1.0/https://creativecommons.org/publicdomain/zero/1.0/
Construct two types of models -- (A) a deep learning classifier such as LSTM or similar model to predict the category of a news article given its title and abstract, and (B) A recommendation system to recommend posts that a user is most likely to click.
The dataset consists of two files -- (1) user_news_clicks.csv, and (2) news_text.csv.
Model A, the deep learning classifier only requires the news_text.csv dataset. The goal is to predict the ‘category’ label using the ‘title’ and ‘abstract; columns. Model B, the recommendation system only requires user_news_clicks.csv but you can use the news_text.csv in addition if you’d like though it is not necessary for this exercise. The goal is to be able to recommend users news articles that they’re likely to click.
In news_text.csv - each record consists of three attributes and a target variable: - Category - There are lots of news categories available in this dataset, as requested we need to only 3 categories - news, sports and finance - news_id - Identification number of the news - title - Title of the news - abstract - Abstract of the news
In user_news_clicks.csv - each record consists of two attributes and a target variable: - click - User has clicked the articles or not - user_id - Identification number of the user - item - Identification number of an item
NOTE: We do not need to use the entire dataset, if resources are limited. Feel free to sample. - For Model A, use only the top 3 categories -- namely news, sports, and finance for model training and validation. - Code and build the models A and B using a Python library such as Pytorch or Tensorflow
Facebook
TwitterMIT Licensehttps://opensource.org/licenses/MIT
License information was derived automatically
Dataset Card for MNIST
Dataset Summary
The MNIST dataset consists of 70,000 28x28 black-and-white images of handwritten digits extracted from two NIST databases. There are 60,000 images in the training dataset and 10,000 images in the validation dataset, one class per digit so a total of 10 classes, with 7,000 images (6,000 train images and 1,000 test images) per class. Half of the image were drawn by Census Bureau employees and the other half by high school students… See the full description on the dataset page: https://huggingface.co/datasets/ylecun/mnist.
Facebook
TwitterThis is a regression task, where the aim is to predict the burned area of forest fires, in the northeast region of Portugal, by using meteorological and other data.
Data Set Information:
In [Cortez and Morais, 2007], the output 'area' was first transformed with a ln(x+1) function. Then, several Data Mining methods were applied. After fitting the models, the outputs were post-processed with the inverse of the ln(x+1) transform. Four different input setups were used. The experiments were conducted using a 10-fold (cross-validation) x 30 runs. Two regression metrics were measured: MAD and RMSE. A Gaussian support vector machine (SVM) fed with only 4 direct weather conditions (temp, RH, wind and rain) obtained the best MAD value: 12.71 +- 0.01 (mean and confidence interval within 95% using a t-student distribution). The best RMSE was attained by the naive mean predictor. An analysis to the regression error curve (REC) shows that the SVM model predicts more examples within a lower admitted error. In effect, the SVM model predicts better small fires, which are the majority.
Attribute Information:
For more information, read [Cortez and Morais, 2007].
To use this dataset:
import tensorflow_datasets as tfds
ds = tfds.load('forest_fires', split='train')
for ex in ds.take(4):
print(ex)
See the guide for more informations on tensorflow_datasets.
Facebook
Twitterhttps://creativecommons.org/publicdomain/zero/1.0/https://creativecommons.org/publicdomain/zero/1.0/
The dataset is designed to train and evaluate AI models for tasks like art classification, aesthetic analysis, and personalized creative guidance in educational environments. These categories reflect a wide spectrum of visual and material art, allowing for nuanced learning outcomes that blend creativity, cultural sensitivity, and technical analysis.
All images are organized into structured training and validation folders, making the dataset ideal for deep learning applications using frameworks like TensorFlow or PyTorch.
This dataset contains a curated collection of approximately 9,000 high-quality images across five distinct categories of visual art: Drawings, Paintings, Sculptures, Engravings, and Iconography. Designed to support research in art classification, cultural heritage analysis, and interdisciplinary art education, the dataset serves as a valuable resource for both academic and applied machine learning studies.
📁 Structure The dataset is organized into clearly labeled directories, suitable for use with deep learning frameworks:
drawings/: Includes pencil, ink, and watercolor illustrations
painting/: Features works in oil, acrylic, and other painting media
sculpture/: Covers 3D artworks including stone, bronze, and mixed media
engraving/: Includes etchings, lithographs, and other graphic techniques
iconography/: Focuses on historical and religious art, especially Old Russian
⭐ Key Features Five Major Art Categories: Includes Drawings, Paintings, Sculptures, Engravings, and Iconography (traditional cultural art).
~9,000 Curated Images: High-quality images collected from sources such as Google Images, Yandex, and cultural art databases.
Ready for Deep Learning: Structured into training_set/ folders, compatible with TensorFlow, Keras, and PyTorch workflows.
Cultural Heritage Emphasis: Especially suitable for educational AI systems aiming to blend aesthetic learning with historical context.
Supports Design Thinking-Based Learning: Enables interactive, project-based exploration of art classification and creative analysis.
Use Case: Designed for use in projects involving art education, cultural heritage classification, and intelligent learning systems.
Facebook
TwitterAttribution 4.0 (CC BY 4.0)https://creativecommons.org/licenses/by/4.0/
License information was derived automatically
EfficientDet (PyTorch) This is a work in progress PyTorch implementation of EfficientDet.
It is based on the
official Tensorflow implementation by Mingxing Tan and the Google Brain team paper by Mingxing Tan, Ruoming Pang, Quoc V. Le EfficientDet: Scalable and Efficient Object Detection I am aware there are other PyTorch implementations. Their approach didn't fit well with my aim to replicate the Tensorflow models closely enough to allow weight ports while still maintaining a PyTorch feel and a high degree of flexibility for future additions. So, this is built from scratch and leverages my previous EfficientNet work.
Updates / Tasks 2020-4-15 Taking a pause on training, some high priority things came up. There are signs of life on the training branch, was working the basic augs before priority switch, loss fn appeared to be doing something sane with distributed training working, no proper eval yet, init not correct yet. I will get to it, with SOTA training config and good performance as the end goal (as with my EfficientNet work).
2020-04-11 Cleanup post-processing. Less code and a five-fold throughput increase on the smaller models. D0 running > 130 img/s on a single 2080Ti, D1 > 130 img/s on dual 2080Ti up to D7 @ 8.5 img/s.
2020-04-10 Replace generate_detections with PyTorch impl using torchvision batched_nms. Significant performance increase with minor (+/-.001 mAP) score differences. Quite a bit faster than original TF impl on a GPU now.
2020-04-09 Initial code with working validation posted. Yes, it's a little slow, but I think faster than the official impl on a GPU if you leave AMP enabled. Post processing needs some love.
Core Tasks Feature extraction from my EfficientNet implementations (https://github.com/rwightman/gen-efficientnet-pytorch or https://github.com/rwightman/pytorch-image-models) Low level blocks / helpers (SeparableConv, create_pool2d (same padding), etc) PyTorch implementation of BiFPN, BoxNet, ClassNet modules and related submodules Port Tensorflow checkpoints to PyTorch -- initial D1 checkpoint converted, state_dict loaded, on to validation.... Basic MS COCO validation script Temporary (hacky) COCO dataset and transform Port reference TF anchor and object detection code Verify model output sanity Integrate MSCOCO eval metric calcs Some cleanup, testing Submit to test-dev server, all good Add torch hub support and pretrained URL based weight download Change module dependencies from 'timm' to minimal 'geffnet' for backbone, bring some of the layers here leaving as timm for now, as the training code will use many timm functions that I leverage to reproduce SOTA EfficientNet training in PyTorch Remove redundant bias layers that exist in the official impl and weights Add visualization support Performance improvements, numpy TF detection code -> optimized PyTorch Verify/fix Torchscript and ONNX export compatibility Possible Future Tasks Training (object detection) reimplementation w/ Rand/AutoAugment, etc Training (semantic segmentation) experiments Integration with Detectron2 / MMDetection codebases Addition and cleanup of EfficientNet based U-Net and DeepLab segmentation models that I've used in past projects Addition and cleanup of OpenImages dataset/training support from a past project Exploration of instance segmentation possibilities... If you are an organization is interested in sponsoring and any of this work, or prioritization of the possible future directions interests you, feel free to contact me (issue, LinkedIn, Twitter, hello at rwightman dot com). I will setup a github sponser if there is any interest.
Models Variant Download mAP (val2017) mAP (test-dev2017) mAP (Tensorflow official test-dev2017) D0 tf_efficientdet_d0.pth 32.8 TBD 33.8 D1 tf_efficientdet_d1.pth 38.5 TBD 39.6 D2 tf_efficientdet_d2.pth 42.0 42.5 43 D3 tf_efficientdet_d3.pth 45.3 TBD 45.8 D4 tf_efficientdet_d4.pth 48.3 TBD 49.4 D5 tf_efficientdet_d5.pth 49.6 TBD 50.7 D6 tf_efficientdet_d6.pth 50.6 TBD 51.7 D7 tf_efficientdet_d7.pth 50.9 51.2 52.2 Usage Environment Setup Tested in a Python 3.7 or 3.8 conda environment in Linux with:
PyTorch 1.4 PyTorch Image Models (timm) 0.1.20, pip install timm or local install from (https://github.com/rwightman/pytorch-image-models) Apex AMP master (as of 2020-04) NOTE - There is a conflict/bug with Numpy 1.18+ and pycocotools, force install numpy <= 1.17.5 or the coco eval will fail, the validation script will still save the output JSON and that can be run through eval again later.
Dataset Setup MSCOCO 2017 validation data:
wget http://images.cocodataset.org/zips/val2017.zip wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip unzip val2017.zip unzip annotations_trainval2017.zip MSCOCO 2017 test-dev data:
wget http://images.cocodataset.org/zips/test2017.zip unzip -q test2017.zip wget http://images.cocodat...
Facebook
Twitterhttps://creativecommons.org/publicdomain/zero/1.0/https://creativecommons.org/publicdomain/zero/1.0/
This is Landscape classification dataset. This data consists of 5 different classes. Each class representing a kind of landscape. These classes are : * Coast * This class contains images belonging to coastal areas, or simply beaches. * Desert * This class contains images of desert areas such as Sahara Thar, etc. * Forest * This class is filled with images belonging to forest areas such as Amazon. * Glacier * This class consists of some amazing white images, These images belongs to glaciers. For example, the Antarctic. * Mountains * This class shows you the world from the top i.e. the mountain areas such as the Himalayas.
This data is first divided into 3 sub directories. These sub directories are the training, validation, and testing data directories. Another directory of tensorflow records is also added, which is further divided into 3 directories of training, validation and testing data, containing the tensorflow records of these images. This allow you to load the data both using Image Data Generator, or using the tensorflow records.
From my perspective for any. For any model to perform well on this data set, the model should have proper knowledge of the colors and the geometry of the image. Because when both the colors and geometry come together, they make up a landscape.
Facebook
TwitterCityscapes is a dataset consisting of diverse urban street scenes across 50 different cities at varying times of the year as well as ground truths for several vision tasks including semantic segmentation, instance level segmentation (TODO), and stereo pair disparity inference.
For segmentation tasks (default split, accessible via 'cityscapes/semantic_segmentation'), Cityscapes provides dense pixel level annotations for 5000 images at 1024 * 2048 resolution pre-split into training (2975), validation (500) and test (1525) sets. Label annotations for segmentation tasks span across 30+ classes commonly encountered during driving scene perception. Detailed label information may be found here: https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py#L52-L99
Cityscapes also provides coarse grain segmentation annotations (accessible via 'cityscapes/semantic_segmentation_extra') for 19998 images in a 'train_extra' split which may prove useful for pretraining / data-heavy models.
Besides segmentation, cityscapes also provides stereo image pairs and ground truths for disparity inference tasks on both the normal and extra splits (accessible via 'cityscapes/stereo_disparity' and 'cityscapes/stereo_disparity_extra' respectively).
Ingored examples:
WARNING: this dataset requires users to setup a login and password in order to get the files.
To use this dataset:
import tensorflow_datasets as tfds
ds = tfds.load('cityscapes', split='train')
for ex in ds.take(4):
print(ex)
See the guide for more informations on tensorflow_datasets.
Facebook
Twitterhttps://creativecommons.org/publicdomain/zero/1.0/https://creativecommons.org/publicdomain/zero/1.0/
FER2013 (Facial Expression Recognition 2013) dataset is a widely used dataset for training and evaluating facial expression recognition models. Here are key details about the FER2013 dataset:
Overview:
FER2013 is a dataset designed for facial expression recognition tasks, particularly the classification of facial expressions into seven different emotion categories. The dataset was introduced for the Emotion Recognition in the Wild (EmotiW) Challenge in 2013.
Emotion Categories:
The dataset consists of images labeled with seven emotion categories: Angry, Disgust, Fear, Happy, Sad, Surprise, and Neutral.
Image Size:
Each image in the FER2013 dataset is grayscale and has a resolution of 48x48 pixels.
Number of Images:
The dataset contains a total of 35,887 labeled images, with approximately 5,000 images per emotion category. Partitioning:
FER2013 is often divided into training, validation, and test sets. The original split has 28,709 images for training, 3,589 images for validation, and 3,589 images for testing.
Usage in Research:
FER2013 has been widely used in research for benchmarking and training facial expression recognition models, particularly deep learning models. It provides a standard dataset for evaluating the performance of models on real-world facial expressions. Challenges:
The FER2013 dataset is known for its relatively simple and posed facial expressions. In real-world scenarios, facial expressions can be more complex and spontaneous, and there are datasets addressing these challenges.
Challenges and Criticisms:
Some criticisms of the dataset include its relatively small size, limited diversity in facial expressions, and the fact that some expressions (e.g., "Disgust") are challenging to recognize accurately.
This pre trained machine model implements a Convolutional Neural Network (CNN) for emotion detection using the TensorFlow and Keras frameworks. The model architecture includes convolutional layers, batch normalization, and dropout for effective feature extraction and classification. The training process utilizes an ImageDataGenerator for data augmentation, enhancing the model's ability to generalize to various facial expressions.
Key Steps:
Model Training: The CNN model is trained on an emotion dataset using an ImageDataGenerator for dynamic data augmentation. Training is performed over a specified number of epochs with a reduced batch size for efficient learning.
Model Checkpoint: ModelCheckpoint is employed to save the best-performing model during training, ensuring that the most accurate model is retained.
Save Model and Memory Cleanup: The trained model is saved in both HDF5 and JSON formats. Memory is efficiently managed by deallocating resources, clearing the Keras session, and performing garbage collection.
Facebook
TwitterMIT Licensehttps://opensource.org/licenses/MIT
License information was derived automatically
Download and Extract:
Download the dataset from Kaggle.
Extract the ZIP file if needed; images are organized into folders, where each folder name is the class label (like snake, lizard, frog, etc.).
Understand the Structure:
The dataset contains 9 major classes of reptiles and amphibians.
Each class folder contains multiple high-quality images belonging to that species or group.
Load the Dataset into Your Project:
If using PyTorch, use torchvision.datasets.ImageFolder to load images directly.
If using TensorFlow, use tf.keras.utils.image_dataset_from_directory.
You can also manually read images using OpenCV or PIL if needed.
Preprocessing:
Resize images if needed (e.g., 224x224 for ResNet models).
Normalize pixel values (e.g., divide by 255) to prepare for training.
Splitting the Data:
Optionally split the dataset into train, validation, and test sets.
You can split randomly or based on a percentage (e.g., 80% training, 20% validation/testing).
Training Your Model:
You can use any CNN model like ResNet, MobileNet, EfficientNet, etc.
Fine-tune pre-trained models using transfer learning for faster results.
Use the class folders for automatic label generation.
Handling Easily:
Use batch processing and data augmentation (flip, rotate, zoom) during training.
Use GPU if available for faster training.
Keep your classes in a list if needed for mapping predictions back to names.
Facebook
TwitterA big thank you to my GitHub Sponsors for their support!
In addition to the sponsors at the link above, I've received hardware and/or cloud resources from * Nvidia (https://www.nvidia.com/en-us/) * TFRC (https://www.tensorflow.org/tfrc)
I'm fortunate to be able to dedicate significant time and money of my own supporting this and other open source projects. However, as the projects increase in scope, outside support is needed to continue with the current trajectory of hardware, infrastructure, and electricty costs.
timm bits branch).data, a bit more consistency, unit tests for all!efficientnetv2_rw_t weights, a custom 'tiny' 13.6M param variant that is a bit better than (non NoisyStudent) B3 models. Both faster and better accuracy (at same or lower res)
vit_base_patch16_sam_224) and B/32 (vit_base_patch32_sam_224) models.jx_nest_base - 83.534, jx_nest_small - 83.120, jx_nest_tiny - 81.426gmlp_s16_224 trained to 79.6 top-1, matching paper. Hparams for this and other recent MLP training herevit_large_patch16_384 (87.1 top-1), vit_large_r50_s32_384 (86.2 top-1), vit_base_patch16_384 (86.0 top-1)vit_deit_* renamed to just deit_*gmixer_24_224 MLP /w GLU, 78.1 top-1 w/ 25M params.eca_nfnet_l2 weights from my 'lightweight' series. 84.7 top-1 at 384x384.
Facebook
TwitterAttribution-ShareAlike 3.0 (CC BY-SA 3.0)https://creativecommons.org/licenses/by-sa/3.0/
License information was derived automatically
Imports:
# All Imports
import os
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.calibration import LabelEncoder
import seaborn as sns
import matplotlib.image as mpimg
import cv2
import numpy as np
import pickle
# Tensflor and Keras Layer and Model and Optimize and Loss
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import *
#Kernel Intilizer
from keras.optimizers import Adamax
# PreTrained Model
from keras.applications import *
#Early Stopping
from keras.callbacks import EarlyStopping
import warnings
Warnings Suppression | Configuration
# Warnings Remove
warnings.filterwarnings("ignore")
# Define the base path for the training folder
base_path = 'jaguar_cheetah/train'
# Weights file
weights_file = 'Model_train_weights.weights.h5'
# Path to the saved or to save the model:
model_file = 'Model-cheetah_jaguar_Treined.keras'
# Model history
history_path = 'training_history_cheetah_jaguar.pkl'
# Initialize lists to store file paths and labels
filepaths = []
labels = []
# Iterate over folders and files within the training directory
for folder in ['Cheetah', 'Jaguar']:
folder_path = os.path.join(base_path, folder)
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
filepaths.append(file_path)
labels.append(folder)
# Create the TRAINING dataframe
file_path_series = pd.Series(filepaths , name= 'filepath')
Label_path_series = pd.Series(labels , name = 'label')
df_train = pd.concat([file_path_series ,Label_path_series ] , axis = 1)
# Define the base path for the test folder
directory = "jaguar_cheetah/test"
filepath =[]
label = []
folds = os.listdir(directory)
for fold in folds:
f_path = os.path.join(directory , fold)
imgs = os.listdir(f_path)
for img in imgs:
img_path = os.path.join(f_path , img)
filepath.append(img_path)
label.append(fold)
# Create the TEST dataframe
file_path_series = pd.Series(filepath , name= 'filepath')
Label_path_series = pd.Series(label , name = 'label')
df_test = pd.concat([file_path_series ,Label_path_series ] , axis = 1)
# Display the first rows of the dataframe for verification
#print(df_train)
# Folders with Training and Test files
data_dir = 'jaguar_cheetah/train'
test_dir = 'jaguar_cheetah/test'
# Image size 256x256
IMAGE_SIZE = (256,256)
Tain | Test
#print('Training Images:')
# Create the TRAIN dataframe
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.1,
subset='training',
seed=123,
image_size=IMAGE_SIZE,
batch_size=32)
#Testing Data
#print('Validation Images:')
validation_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.1,
subset='validation',
seed=123,
image_size=IMAGE_SIZE,
batch_size=32)
print('Testing Images:')
test_ds = tf.keras.utils.image_dataset_from_directory(
test_dir,
seed=123,
image_size=IMAGE_SIZE,
batch_size=32)
# Extract labels
train_labels = train_ds.class_names
test_labels = test_ds.class_names
validation_labels = validation_ds.class_names
# Encode labels
# Defining the class labels
class_labels = ['CHEETAH', 'JAGUAR']
# Instantiate (encoder) LabelEncoder
label_encoder = LabelEncoder()
# Fit the label encoder on the class labels
label_encoder.fit(class_labels)
# Transform the labels for the training dataset
train_labels_encoded = label_encoder.transform(train_labels)
# Transform the labels for the validation dataset
validation_labels_encoded = label_encoder.transform(validation_labels)
# Transform the labels for the testing dataset
test_labels_encoded = label_encoder.transform(test_labels)
# Normalize the pixel values
# Train files
train_ds = train_ds.map(lambda x, y: (x / 255.0, y))
# Validate files
validation_ds = validation_ds.map(lambda x, y: (x / 255.0, y))
# Test files
test_ds = test_ds.map(lambda x, y: (x / 255.0, y))
#TRAINING VISUALIZATION
#Count the occurrences of each category in the column
count = df_train['label'].value_counts()
# Create a figure with 2 subplots
fig, axs = plt.subplots(1, 2, figsize=(12, 6), facecolor='white')
# Plot a pie chart on the first subplot
palette = sns.color_palette("viridis")
sns.set_palette(palette)
axs[0].pie(count, labels=count.index, autopct='%1.1f%%', startangle=140)
axs[0].set_title('Distribution of Training Categories')
# Plot a bar chart on the second subplot
sns.barplot(x=count.index, y=count.values, ax=axs[1], palette="viridis")
axs[1].set_title('Count of Training Categories')
# Adjust the layout
plt.tight_layout()
# Visualize
plt.show()
# TEST VISUALIZATION
count = df_test['label'].value_counts()
# Create a figure with 2 subplots
fig, axs = plt.subplots(1, 2, figsize=(12, 6), facec...