Presentation
Presentation
etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds prepare complete ppt
presentation for this with full information
ChatGPT
Date: [Date]
Slide 2: Introduction
Objectives
Objective: To build and train an object detection model for fruits using Faster R-
CNN.
Dataset: Contains images of apples, bananas, and oranges.
Key Functions
Code Snippet
python
Copy code
def get_transform(train):
if train:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})
else:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})
Code Snippet
python
Copy code
train_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
Code Snippet
python
Copy code
def get_object_detection_model(num_classes):
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
num_classes)
return model
Code Snippet
python
Copy code
def train_model(model, optimizer, lr_scheduler, train_loader, val_loader,
num_epoch=10, device='cuda'):
for epoch in range(1, num_epoch + 1):
...
return model
Code Snippet
python
Copy code
import torchmetrics
metric = MeanAveragePrecision()
Slide 10: Training Results
Loss and mAP
Example Output
plaintext
Copy code
[Epoch 10 / 30] Train loss: 0.123 [300 secs] | Val mAP: 0.78
Function: plot_img_bbox
Visualization: Shows bounding boxes on images.
Code Snippet
python
Copy code
def plot_img_bbox(img, target):
...
Code Snippet
python
Copy code
def apply_threshold(prediction, threshold=0.6):
...
def apply_threshold_batch(prediction, threshold=0.1):
...
Visualization
Next Steps
Feel free to adjust and add more details as necessary to fit the presentation needs.
4o
please expand the bullet points for all slides
2/2
ChatGPT
Certainly! Here's an expanded version of the PowerPoint presentation with detailed bullet
points for each slide.
Date: [Date]
Slide 2: Introduction
Objectives
Objective:
o Develop a robust object detection model to identify and classify fruits.
o Utilize the Faster R-CNN framework for detection tasks.
Dataset:
o A collection of images containing apples, bananas, and oranges.
o Each image has corresponding annotation files marking the bounding boxes
and labels for the fruits.
Data Directory:
o Organize images and annotations within a specific directory structure.
o Images are stored in JPG format.
Annotations:
o Annotations provided in XML format.
o XML files adhere to the Pascal VOC structure, containing details about
bounding boxes and labels.
Classes:
o Defined classes: apple, banana, orange.
o The class index mapping: {0: background, 1: apple, 2: banana, 3: orange}.
Key Functions
__init__:
o Initializedataset with image directory, target image dimensions, and
transformations.
o Load image filenames and define class labels.
__getitem__:
o Load and preprocess image and corresponding annotation for a given index.
o Convert bounding box coordinates to match the target image dimensions.
__len__:
o Return the total number of images in the dataset.
Training Transformations:
o Apply random horizontal and vertical flips to augment the dataset.
o Normalize and convert images to tensor format.
Validation Transformations:
o Normalize and convert images to tensor format without augmentation.
Code Snippet
python
Copy code
def get_transform(train):
if train:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})
else:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})
Training Data:
o Split the dataset into training and validation subsets using a specified ratio.
Validation Data:
o A separate dataset for evaluating the model performance.
Batch Size:
o Set batch size to 5 for both training and validation loaders.
Code Snippet
python
Copy code
train_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
Pre-trained:
o Initialize Faster R-CNN with a ResNet50 backbone pre-trained on COCO
dataset.
Customization:
o Modify the classifier head to match the number of classes in our fruit dataset.
Code Snippet
python
Copy code
def get_object_detection_model(num_classes):
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
num_classes)
return model
Epochs:
o Train the model for a specified number of epochs.
Loss Calculation:
o Compute the total loss by summing individual loss components provided by
the model.
Optimizer:
o Use Stochastic Gradient Descent (SGD) with momentum and weight decay for
optimization.
Code Snippet
python
Copy code
def train_model(model, optimizer, lr_scheduler, train_loader, val_loader,
num_epoch=10, device='cuda'):
for epoch in range(1, num_epoch + 1):
...
return model
Metric:
o Mean Average Precision (mAP) is a common evaluation metric for object
detection models.
o Measures the accuracy of bounding box predictions and class labels.
Implementation:
o Use the torchmetrics library to compute mAP during validation.
Code Snippet
python
Copy code
import torchmetrics
metric = MeanAveragePrecision()
Loss:
o Track and display the training loss over each epoch.
mAP:
o Calculate and display the mean Average Precision on the validation set.
Example Output
plaintext
Copy code
[Epoch 10 / 30] Train loss: 0.123 [300 secs] | Val mAP: 0.78
Function:
o Define a function plot_img_bbox to visualize bounding boxes on images.
Visualization:
o Display images with bounding boxes drawn around detected objects.
Code Snippet
python
Copy code
def plot_img_bbox(img, target):
...
Code Snippet
python
Copy code
def apply_threshold(prediction, threshold=0.6):
...
def apply_threshold_batch(prediction, threshold=0.1):
...
Original Predictions:
o Predictions before applying the thresholding and NMS.
Filtered Predictions:
o Predictions after applying thresholding and NMS to filter out lower confidence
detections.
Visualization
Show images with bounding boxes drawn around detected objects, highlighting the
difference between original and filtered predictions.
Model Training:
o Successfully trained an object detection model for identifying apples, bananas,
and oranges.
Evaluation:
o Evaluated the model using mAP, showing reasonable performance.
Visualization:
o Demonstrated the effectiveness of the model through visual examples.
Next Steps
Fine-Tuning:
o Further fine-tune the model to improve accuracy.
Augmentation:
o Explore additional data augmentation techniques to enhance model robustness.
Open Floor:
o Invite the audience to ask questions and provide clarifications on any part of
the presentation.
Discussion:
o Engage in discussions about potential improvements, use cases, and future
directions for the project.
Feel free to modify and add more specific details, code snippets, and visual aids such as
charts and graphs to enhance the presentation.
4o
mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds expalin the topic through this
code
ChatGPT
Certainly! Here’s a detailed explanation of the provided code, covering the main aspects of
the fruit object detection project using PyTorch and Faster R-CNN:
Overview
This code implements a fruit object detection model using PyTorch and Faster R-CNN. The
primary goal is to detect and classify fruits (apples, bananas, and oranges) in images.
Import Libraries
First, essential libraries are imported:
python
Copy code
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
from xml.etree import ElementTree as et
import torch
import torchvision
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
%matplotlib inline
print(torch.__version__)
Dataset Class
The FruitImagesDataset class handles loading and preprocessing of the dataset:
python
Copy code
class FruitImagesDataset(torch.utils.data.Dataset):
def __init__(self, files_dir, width, height, transforms=None):
self.transforms = transforms
self.files_dir = files_dir
self.height = height
self.width = width
self.imgs = [image for image in sorted(os.listdir(files_dir)) if
image.endswith('.jpg')]
self.classes = ['_', 'apple', 'banana', 'orange']
wt = img.shape[1]
ht = img.shape[0]
for member in root.findall('object'):
labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text)
xmax = int(member.find('bndbox').find('xmax').text)
ymin = int(member.find('bndbox').find('ymin').text)
ymax = int(member.find('bndbox').find('ymax').text)
xmin_corr = (xmin / wt) * self.width
xmax_corr = (xmax / wt) * self.width
ymin_corr = (ymin / ht) * self.height
ymax_corr = (ymax / ht) * self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr])
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["area"] = area
target["iscrowd"] = iscrowd
image_id = torch.tensor([idx])
target["image_id"] = image_id
if self.transforms:
sample = self.transforms(image=img_res, bboxes=target['boxes'],
labels=labels)
img_res = sample['image']
target['boxes'] = torch.Tensor(sample['bboxes'])
def __len__(self):
return len(self.imgs)
Methods:
__init__: Initializes the dataset with image directory, target dimensions, and
transformations.
__getitem__: Loads and preprocesses an image and its annotations for a given index.
__len__: Returns the total number of images in the dataset.
Data Visualization
A function to plot images with bounding boxes:
python
Copy code
import matplotlib.patches as patches
Dataset Initialization
python
Copy code
files_dir = '/content/drive/MyDrive/fruit/train'
test_dir = '/content/drive/MyDrive/fruit/test'
dataset = FruitImagesDataset(files_dir, 224, 224)
print('length of dataset = ', len(dataset), '\n')
python
Copy code
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
def get_object_detection_model(num_classes):
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
num_classes)
return model
python
Copy code
def get_transform(train):
if train:
return A.Compose([
A.HorizontalFlip(0.5),
A.VerticalFlip(0.3),
ToTensorV2(p=1.0)
], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']})
else:
return A.Compose([
ToTensorV2(p=1.0)
], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']})
Data Loader
Creating data loaders for training and validation:
python
Copy code
torch.manual_seed(1)
np.random.seed(1)
indices = np.random.permutation(len(dataset)).tolist()
test_split = 0.1
tsize = len(dataset) - int(len(dataset) * test_split)
train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:])
train_loader = torch.utils.data.DataLoader(train_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=5,
shuffle=False, num_workers=2, collate_fn=collate_fn)
python
Copy code
import time
from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision()
if batch_idx % 500 == 0:
print(f" [Batch {batch_idx:3d} / {n_batches:3d}] Batch
train loss: {loss.item():7.3f}.")
lr_scheduler.step()
train_loss = loss_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches
elapsed = time.time() - time_start
print(f"[Epoch {epoch:2d} / {num_epoch:2d}] Train loss:
{train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ')
preds_single = []
targets_single = []
model.eval()
with torch.no_grad():
for batch_idx, (images, targets) in enumerate(val_loader, 1):
images = list(image.to(device)
4o
mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds prepare ppt presentation for this
ChatGPT
To prepare a PowerPoint presentation for this code, you'll want to follow a structured
approach to present the key components and processes involved. Here's a suggested outline
for your presentation:
Slide 2: Introduction
Slide 8: Results
This outline provides a structured framework for your presentation, covering the main aspects
of your object detection project implemented in PyTorch. You can customize each slide with
relevant content, code snippets, visualizations, and explanations to effectively communicate
your work to your audience.
3.5
Slide 2: Introduction Brief overview of object detection Importance in computer vision tasks
Introduction to PyTorch for deep learning Slide 3: Problem Statement Dataset: Fruit Images
Dataset Goal: Detect and classify fruits in images Challenges: Varying sizes, multiple classes
Slide 4: Dataset and Preprocessing Description of the Fruit Images Dataset Preprocessing
steps: Loading images and annotations Resizing images and bounding boxes Data
augmentation Slide 5: Model Architecture Introduction to Faster R-CNN Overview of the
architecture Explanation of key components: Backbone (ResNet50) Region Proposal
Network (RPN) Region of Interest (ROI) Pooling Fast R-CNN Head Loss functions Slide 6:
Training Setup Splitting dataset into train, validation, and test sets Data loaders for efficient
training Model initialization and optimization: Optimizer: SGD with momentum Learning
rate scheduler Slide 7: Training Process Iterative training loop: Forward pass Loss
computation Backpropagation Optimization Monitoring training loss and validation mAP
Evaluation metrics: Mean Average Precision (mAP) Slide 8: Results Sample predictions on
test images Visualizations of detected fruits with bounding boxes Discussion on model
performance and improvements Slide 9: Thresholding and Non-Maximum Suppression
Explanation of thresholding to filter detections Non-Maximum Suppression (NMS) for post-
processing Implementation details in PyTorch Slide 10: Conclusion Recap of key points
covered Summary of achievements and challenges Future directions for improvement Slide
11: References List of relevant papers, libraries, and resources Credits for datasets and code
snippets used Slide 12: Q&A Open floor for questions and discussions
ChatGPT
Your presentation outline looks great! It provides a clear structure for presenting the key
components of your object detection project implemented in PyTorch. Each slide covers
important aspects, from the introduction to the conclusion, and includes opportunities for
audience engagement during the Q&A session.
If you have any specific content or details you'd like to include on each slide or if you need
assistance with any particular part of the presentation, feel free to let me know!
3.5
mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds based on this explain what we
are doing in this project and concept of this
ChatGPT
In this project, we're implementing an object detection system using PyTorch. The goal is to
detect and classify fruits in images, which involves identifying the presence of fruit instances
and labeling them with their corresponding classes (e.g., apple, banana, orange).
1. Dataset Preparation: We're using the Fruit Images Dataset, which contains images
of various fruits along with annotations specifying the bounding boxes and class
labels of the fruits in the images. We preprocess the dataset by loading images,
resizing them, and extracting bounding box annotations.
2. Model Selection: We're using the Faster R-CNN architecture, a popular choice for
object detection tasks. This architecture consists of a backbone network (ResNet50), a
Region Proposal Network (RPN), and a Region of Interest (ROI) Pooling layer,
among other components.
3. Training Setup: We split the dataset into training, validation, and test sets. We define
data loaders for efficient training and validation. The model is initialized with pre-
trained weights and optimized using the SGD optimizer with momentum. We also set
up a learning rate scheduler to adjust the learning rate during training.
4. Training Process: We train the model using an iterative training loop. In each
iteration, we perform a forward pass to compute the loss, followed by
backpropagation and optimization. We monitor the training loss and validation mean
Average Precision (mAP) as evaluation metrics.
5. Results Analysis: We analyze the results by visualizing sample predictions on test
images. We discuss the model's performance and potential improvements.
6. Thresholding and Non-Maximum Suppression: We apply thresholding to filter out
detections with low confidence scores and use Non-Maximum Suppression (NMS) for
post-processing to remove redundant bounding boxes.