0% found this document useful (0 votes)

24 views31 pages

Presentation

look at this

Uploaded by

Shubhanka A

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

24 views31 pages

Presentation

look at this

Uploaded by

Shubhanka A

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 31

mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.

etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds prepare complete ppt
presentation for this with full information
ChatGPT

Certainly! Below is a comprehensive PowerPoint presentation outline based on the given

code and the training and evaluation of the Fruit Object Detection Model.

Slide 1: Title Slide

Title: Fruit Object Detection Model

Subtitle: Using PyTorch and Faster R-CNN

Presenter: [Your Name]

Date: [Date]

Slide 2: Introduction
Objectives

 Objective: To build and train an object detection model for fruits using Faster R-
CNN.
 Dataset: Contains images of apples, bananas, and oranges.

Slide 3: Dataset Preparation

Steps

 Data Directory: Contains images and corresponding annotation files.

 Annotations: Stored in XML format following Pascal VOC structure.
 Classes: apple, banana, orange.

Slide 4: Dataset Class

Code Overview
python
Copy code
class FruitImagesDataset(torch.utils.data.Dataset):
def __init__(self, files_dir, width, height, transforms=None):
...
def __getitem__(self, idx):
...
def __len__(self):
...

Key Functions

 init: Initializes the dataset.

 __getitem__: Retrieves and preprocesses images and annotations.
 __len__: Returns the total number of images.

Slide 5: Data Transformation and Augmentation

Albumentations Library

 Training Transformations: Horizontal and vertical flips.

 Validation Transformations: Conversion to tensor only.

Code Snippet
python
Copy code
def get_transform(train):
if train:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})
else:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})

Slide 6: Data Loading

Data Loaders

 Training Data: Split into training and validation sets.

 Validation Data: Separate dataset.
 Batch Size: 5

Code Snippet
python
Copy code
train_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)

Slide 7: Model Definition

Faster R-CNN Model

 Pre-trained: Uses ResNet50 with Feature Pyramid Network (FPN).

 Customization: Adjusts the classifier head for the number of fruit classes.

Code Snippet
python
Copy code
def get_object_detection_model(num_classes):
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
num_classes)
return model

Slide 8: Training Loop

Training Procedure

 Epochs: Iterate over the dataset.

 Loss Calculation: Sum of all losses from the model.
 Optimizer: Stochastic Gradient Descent (SGD).

Code Snippet
python
Copy code
def train_model(model, optimizer, lr_scheduler, train_loader, val_loader,
num_epoch=10, device='cuda'):
for epoch in range(1, num_epoch + 1):
...
return model

Slide 9: Evaluation Metrics

Mean Average Precision (mAP)

 Metric: Used to evaluate object detection models.

 Implementation: Uses torchmetrics.

Code Snippet
python
Copy code
import torchmetrics
metric = MeanAveragePrecision()
Slide 10: Training Results
Loss and mAP

 Loss: Training loss over epochs.

 mAP: Validation mean Average Precision.

Example Output
plaintext
Copy code
[Epoch 10 / 30] Train loss: 0.123 [300 secs] | Val mAP: 0.78

Slide 11: Visualization

Plotting Bounding Boxes

 Function: plot_img_bbox
 Visualization: Shows bounding boxes on images.

Code Snippet
python
Copy code
def plot_img_bbox(img, target):
...

Slide 12: Applying Thresholds

Filtering Predictions

 Non-Maximum Suppression (NMS): Filters overlapping bounding boxes based on

scores.

Code Snippet
python
Copy code
def apply_threshold(prediction, threshold=0.6):
...
def apply_threshold_batch(prediction, threshold=0.1):
...

Slide 13: Example Predictions

Sample Output
 Original Predictions: Before applying threshold.
 Filtered Predictions: After applying threshold.

Visualization

 Shows images with bounding boxes drawn around detected objects.

Slide 14: Conclusion

Summary

 Successfully trained an object detection model for fruits.

 Evaluated the model using mAP.
 Demonstrated the effectiveness of applying thresholds.

Next Steps

 Further fine-tuning of the model.

 Exploring more data augmentation techniques.

Slide 15: Q&A

Questions and Answers

 Open floor for questions from the audience.

Feel free to adjust and add more details as necessary to fit the presentation needs.

4o
please expand the bullet points for all slides
2/2
ChatGPT

Certainly! Here's an expanded version of the PowerPoint presentation with detailed bullet
points for each slide.

Slide 1: Title Slide

Title: Fruit Object Detection Model
Subtitle: Using PyTorch and Faster R-CNN

Presenter: [Your Name]

Date: [Date]

Slide 2: Introduction
Objectives

 Objective:
o Develop a robust object detection model to identify and classify fruits.
o Utilize the Faster R-CNN framework for detection tasks.
 Dataset:
o A collection of images containing apples, bananas, and oranges.
o Each image has corresponding annotation files marking the bounding boxes
and labels for the fruits.

Slide 3: Dataset Preparation

Steps

 Data Directory:
o Organize images and annotations within a specific directory structure.
o Images are stored in JPG format.
 Annotations:
o Annotations provided in XML format.
o XML files adhere to the Pascal VOC structure, containing details about
bounding boxes and labels.
 Classes:
o Defined classes: apple, banana, orange.
o The class index mapping: {0: background, 1: apple, 2: banana, 3: orange}.

Slide 4: Dataset Class

Key Functions

 __init__:
o Initializedataset with image directory, target image dimensions, and
transformations.
o Load image filenames and define class labels.
 __getitem__:
o Load and preprocess image and corresponding annotation for a given index.
o Convert bounding box coordinates to match the target image dimensions.
 __len__:
o Return the total number of images in the dataset.

Slide 5: Data Transformation and Augmentation

Albumentations Library

 Training Transformations:
o Apply random horizontal and vertical flips to augment the dataset.
o Normalize and convert images to tensor format.
 Validation Transformations:
o Normalize and convert images to tensor format without augmentation.

Slide 6: Data Loading

Data Loaders

 Training Data:
o Split the dataset into training and validation subsets using a specified ratio.
 Validation Data:
o A separate dataset for evaluating the model performance.
 Batch Size:
o Set batch size to 5 for both training and validation loaders.

Slide 7: Model Definition

Faster R-CNN Model

 Pre-trained:
o Initialize Faster R-CNN with a ResNet50 backbone pre-trained on COCO
dataset.
 Customization:
o Modify the classifier head to match the number of classes in our fruit dataset.

Slide 8: Training Loop

Training Procedure

 Epochs:
o Train the model for a specified number of epochs.
 Loss Calculation:
o Compute the total loss by summing individual loss components provided by
the model.
 Optimizer:
o Use Stochastic Gradient Descent (SGD) with momentum and weight decay for
optimization.

Code Snippet
python
Copy code
def train_model(model, optimizer, lr_scheduler, train_loader, val_loader,
num_epoch=10, device='cuda'):
for epoch in range(1, num_epoch + 1):
...
return model

Slide 9: Evaluation Metrics

Mean Average Precision (mAP)

 Metric:
o Mean Average Precision (mAP) is a common evaluation metric for object
detection models.
o Measures the accuracy of bounding box predictions and class labels.
 Implementation:
o Use the torchmetrics library to compute mAP during validation.

Code Snippet
python
Copy code
import torchmetrics
metric = MeanAveragePrecision()

Slide 10: Training Results

Loss and mAP

 Loss:
o Track and display the training loss over each epoch.
 mAP:
o Calculate and display the mean Average Precision on the validation set.

Example Output
plaintext
Copy code
[Epoch 10 / 30] Train loss: 0.123 [300 secs] | Val mAP: 0.78

Slide 11: Visualization

Plotting Bounding Boxes

 Function:
o Define a function plot_img_bbox to visualize bounding boxes on images.
 Visualization:
o Display images with bounding boxes drawn around detected objects.
Code Snippet
python
Copy code
def plot_img_bbox(img, target):
...

Slide 12: Applying Thresholds

Filtering Predictions

 Non-Maximum Suppression (NMS):

o Filter overlapping bounding boxes based on their confidence scores to retain
the best detections.

Code Snippet
python
Copy code
def apply_threshold(prediction, threshold=0.6):
...
def apply_threshold_batch(prediction, threshold=0.1):
...

Slide 13: Example Predictions

Sample Output

 Original Predictions:
o Predictions before applying the thresholding and NMS.
 Filtered Predictions:
o Predictions after applying thresholding and NMS to filter out lower confidence
detections.

Visualization

 Show images with bounding boxes drawn around detected objects, highlighting the
difference between original and filtered predictions.

Slide 14: Conclusion

Summary

 Model Training:
o Successfully trained an object detection model for identifying apples, bananas,
and oranges.
 Evaluation:
o Evaluated the model using mAP, showing reasonable performance.
 Visualization:
o Demonstrated the effectiveness of the model through visual examples.

Next Steps

 Fine-Tuning:
o Further fine-tune the model to improve accuracy.
 Augmentation:
o Explore additional data augmentation techniques to enhance model robustness.

Slide 15: Q&A

Questions and Answers

 Open Floor:
o Invite the audience to ask questions and provide clarifications on any part of
the presentation.
 Discussion:
o Engage in discussions about potential improvements, use cases, and future
directions for the project.

Feel free to modify and add more specific details, code snippets, and visual aids such as
charts and graphs to enhance the presentation.

4o
mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds expalin the topic through this
code
ChatGPT

Certainly! Here’s a detailed explanation of the provided code, covering the main aspects of
the fruit object detection project using PyTorch and Faster R-CNN:

Overview
This code implements a fruit object detection model using PyTorch and Faster R-CNN. The
primary goal is to detect and classify fruits (apples, bananas, and oranges) in images.

Import Libraries
First, essential libraries are imported:
python
Copy code
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
from xml.etree import ElementTree as et
import torch
import torchvision
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
%matplotlib inline
print(torch.__version__)

 os: File handling.

 numpy: Array operations.
 matplotlib: Plotting images.
 cv2: Image processing.
 ElementTree: Parsing XML files.
 torch, torchvision: PyTorch and its vision library.
 albumentations: Data augmentation.
 ToTensorV2: Convert image to tensor.

Dataset Class
The FruitImagesDataset class handles loading and preprocessing of the dataset:

python
Copy code
class FruitImagesDataset(torch.utils.data.Dataset):
def __init__(self, files_dir, width, height, transforms=None):
self.transforms = transforms
self.files_dir = files_dir
self.height = height
self.width = width
self.imgs = [image for image in sorted(os.listdir(files_dir)) if
image.endswith('.jpg')]
self.classes = ['_', 'apple', 'banana', 'orange']

def getitem(self, idx):

img_name = self.imgs[idx]
image_path = os.path.join(self.files_dir, img_name)
img = cv2.imread(image_path)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
img_res = cv2.resize(img_rgb, (self.width, self.height),
cv2.INTER_AREA)
img_res /= 255.0

annot_filename = img_name[:-4] + '.xml'

annot_file_path = os.path.join(self.files_dir, annot_filename)
boxes = []
labels = []
tree = et.parse(annot_file_path)
root = tree.getroot()

wt = img.shape[1]
ht = img.shape[0]
for member in root.findall('object'):
labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text)
xmax = int(member.find('bndbox').find('xmax').text)
ymin = int(member.find('bndbox').find('ymin').text)
ymax = int(member.find('bndbox').find('ymax').text)
xmin_corr = (xmin / wt) * self.width
xmax_corr = (xmax / wt) * self.width
ymin_corr = (ymin / ht) * self.height
ymax_corr = (ymax / ht) * self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr])

boxes = torch.as_tensor(boxes, dtype=torch.float32)

area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
labels = torch.as_tensor(labels, dtype=torch.int64)

target = {}
target["boxes"] = boxes
target["labels"] = labels
target["area"] = area
target["iscrowd"] = iscrowd
image_id = torch.tensor([idx])
target["image_id"] = image_id

if self.transforms:
sample = self.transforms(image=img_res, bboxes=target['boxes'],
labels=labels)
img_res = sample['image']
target['boxes'] = torch.Tensor(sample['bboxes'])

return img_res, target

def __len__(self):
return len(self.imgs)

Methods:

 __init__: Initializes the dataset with image directory, target dimensions, and
transformations.
 __getitem__: Loads and preprocesses an image and its annotations for a given index.
 __len__: Returns the total number of images in the dataset.

Data Visualization
A function to plot images with bounding boxes:

python
Copy code
import matplotlib.patches as patches

def plot_img_bbox(img, target):

fig, a = plt.subplots(1, 1)
fig.set_size_inches(5, 5)
if isinstance(img, torch.Tensor):
img = transforms.ToPILImage()(img).convert('RGB')
a.imshow(img)
boxes = target['boxes']
if isinstance(boxes, torch.Tensor):
boxes = boxes.tolist()
for box in boxes:
x, y, width, height = box[0], box[1], box[2] - box[0], box[3] -
box[1]
rect = patches.Rectangle((x, y), width, height, linewidth=2,
edgecolor='r', facecolor='none')
a.add_patch(rect)
plt.show()

Dataset Initialization
python
Copy code
files_dir = '/content/drive/MyDrive/fruit/train'
test_dir = '/content/drive/MyDrive/fruit/test'
dataset = FruitImagesDataset(files_dir, 224, 224)
print('length of dataset = ', len(dataset), '\n')

 Initializes the training and test datasets.

Faster R-CNN Model

Function to get a Faster R-CNN model with the desired number of classes:

python
Copy code
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

def get_object_detection_model(num_classes):
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
num_classes)
return model

Data Augmentation and Transformation

Transforms for training and validation datasets:

python
Copy code
def get_transform(train):
if train:
return A.Compose([
A.HorizontalFlip(0.5),
A.VerticalFlip(0.3),
ToTensorV2(p=1.0)
], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']})
else:
return A.Compose([
ToTensorV2(p=1.0)
], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']})

Data Loader
Creating data loaders for training and validation:

python
Copy code
torch.manual_seed(1)
np.random.seed(1)
indices = np.random.permutation(len(dataset)).tolist()
test_split = 0.1
tsize = len(dataset) - int(len(dataset) * test_split)
train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:])
train_loader = torch.utils.data.DataLoader(train_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=5,
shuffle=False, num_workers=2, collate_fn=collate_fn)

Training the Model

Training loop for the model:

python
Copy code
import time
from torchmetrics.detection.mean_ap import MeanAveragePrecision

metric = MeanAveragePrecision()

def train_model(model, optimizer, lr_scheduler, train_loader, val_loader,

num_epoch=10, device='cuda'):
n_batches = len(train_loader)
for epoch in range(1, num_epoch + 1):
print(f"Starting epoch {epoch} of {num_epoch}")
time_start = time.time()
loss_accum = 0.0
loss_classifier_accum = 0.0
model.train()

for batch_idx, (images, targets) in enumerate(train_loader, 1):

images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in
targets]
loss_dict = model(images, targets)
loss = sum(loss for loss in loss_dict.values())
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_accum += loss.item()
loss_classifier_accum += loss_dict['loss_classifier'].item()

if batch_idx % 500 == 0:
print(f" [Batch {batch_idx:3d} / {n_batches:3d}] Batch
train loss: {loss.item():7.3f}.")

lr_scheduler.step()
train_loss = loss_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches
elapsed = time.time() - time_start
print(f"[Epoch {epoch:2d} / {num_epoch:2d}] Train loss:
{train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ')

preds_single = []
targets_single = []
model.eval()
with torch.no_grad():
for batch_idx, (images, targets) in enumerate(val_loader, 1):
images = list(image.to(device)
4o
mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds prepare ppt presentation for this
ChatGPT

To prepare a PowerPoint presentation for this code, you'll want to follow a structured
approach to present the key components and processes involved. Here's a suggested outline
for your presentation:

Slide 1: Title Slide

 Title: Object Detection with PyTorch

 Subtitle: Using Faster R-CNN and Mean Average Precision
 Your Name
 Date

Slide 2: Introduction

 Brief overview of object detection

 Importance in computer vision tasks
 Introduction to PyTorch for deep learning

Slide 3: Problem Statement

 Dataset: Fruit Images Dataset

 Goal: Detect and classify fruits in images
 Challenges: Varying sizes, multiple classes

Slide 4: Dataset and Preprocessing

 Description of the Fruit Images Dataset

 Preprocessing steps:
o Loading images and annotations
o Resizing images and bounding boxes
o Data augmentation

Slide 5: Model Architecture

 Introduction to Faster R-CNN

 Overview of the architecture
 Explanation of key components:
o Backbone (ResNet50)
o Region Proposal Network (RPN)
o Region of Interest (ROI) Pooling
o Fast R-CNN Head
o Loss functions

Slide 6: Training Setup

 Splitting dataset into train, validation, and test sets

 Data loaders for efficient training
 Model initialization and optimization:
o Optimizer: SGD with momentum
o Learning rate scheduler

Slide 7: Training Process

 Iterative training loop:

o Forward pass
o Loss computation
o Backpropagation
o Optimization
 Monitoring training loss and validation mAP
 Evaluation metrics: Mean Average Precision (mAP)

Slide 8: Results

 Sample predictions on test images

 Visualizations of detected fruits with bounding boxes
 Discussion on model performance and improvements

Slide 9: Thresholding and Non-Maximum Suppression

 Explanation of thresholding to filter detections

 Non-Maximum Suppression (NMS) for post-processing
 Implementation details in PyTorch

Slide 10: Conclusion

 Recap of key points covered

 Summary of achievements and challenges
 Future directions for improvement

Slide 11: References

 List of relevant papers, libraries, and resources

 Credits for datasets and code snippets used

Slide 12: Q&A

 Open floor for questions and discussions

This outline provides a structured framework for your presentation, covering the main aspects
of your object detection project implemented in PyTorch. You can customize each slide with
relevant content, code snippets, visualizations, and explanations to effectively communicate
your work to your audience.

3.5
Slide 2: Introduction Brief overview of object detection Importance in computer vision tasks
Introduction to PyTorch for deep learning Slide 3: Problem Statement Dataset: Fruit Images
Dataset Goal: Detect and classify fruits in images Challenges: Varying sizes, multiple classes
Slide 4: Dataset and Preprocessing Description of the Fruit Images Dataset Preprocessing
steps: Loading images and annotations Resizing images and bounding boxes Data
augmentation Slide 5: Model Architecture Introduction to Faster R-CNN Overview of the
architecture Explanation of key components: Backbone (ResNet50) Region Proposal
Network (RPN) Region of Interest (ROI) Pooling Fast R-CNN Head Loss functions Slide 6:
Training Setup Splitting dataset into train, validation, and test sets Data loaders for efficient
training Model initialization and optimization: Optimizer: SGD with momentum Learning
rate scheduler Slide 7: Training Process Iterative training loop: Forward pass Loss
computation Backpropagation Optimization Monitoring training loss and validation mAP
Evaluation metrics: Mean Average Precision (mAP) Slide 8: Results Sample predictions on
test images Visualizations of detected fruits with bounding boxes Discussion on model
performance and improvements Slide 9: Thresholding and Non-Maximum Suppression
Explanation of thresholding to filter detections Non-Maximum Suppression (NMS) for post-
processing Implementation details in PyTorch Slide 10: Conclusion Recap of key points
covered Summary of achievements and challenges Future directions for improvement Slide
11: References List of relevant papers, libraries, and resources Credits for datasets and code
snippets used Slide 12: Q&A Open floor for questions and discussions
ChatGPT

Your presentation outline looks great! It provides a clear structure for presenting the key
components of your object detection project implemented in PyTorch. Each slide covers
important aspects, from the introduction to the conclusion, and includes opportunities for
audience engagement during the Q&A session.

If you have any specific content or details you'd like to include on each slide or if you need
assistance with any particular part of the presentation, feel free to let me know!

3.5
mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds based on this explain what we
are doing in this project and concept of this
ChatGPT

In this project, we're implementing an object detection system using PyTorch. The goal is to
detect and classify fruits in images, which involves identifying the presence of fruit instances
and labeling them with their corresponding classes (e.g., apple, banana, orange).

Here's a breakdown of what we're doing:

1. Dataset Preparation: We're using the Fruit Images Dataset, which contains images
of various fruits along with annotations specifying the bounding boxes and class
labels of the fruits in the images. We preprocess the dataset by loading images,
resizing them, and extracting bounding box annotations.
2. Model Selection: We're using the Faster R-CNN architecture, a popular choice for
object detection tasks. This architecture consists of a backbone network (ResNet50), a
Region Proposal Network (RPN), and a Region of Interest (ROI) Pooling layer,
among other components.
3. Training Setup: We split the dataset into training, validation, and test sets. We define
data loaders for efficient training and validation. The model is initialized with pre-
trained weights and optimized using the SGD optimizer with momentum. We also set
up a learning rate scheduler to adjust the learning rate during training.
4. Training Process: We train the model using an iterative training loop. In each
iteration, we perform a forward pass to compute the loss, followed by
backpropagation and optimization. We monitor the training loss and validation mean
Average Precision (mAP) as evaluation metrics.
5. Results Analysis: We analyze the results by visualizing sample predictions on test
images. We discuss the model's performance and potential improvements.
6. Thresholding and Non-Maximum Suppression: We apply thresholding to filter out
detections with low confidence scores and use Non-Maximum Suppression (NMS) for
post-processing to remove redundant bounding boxes.

07 - Assessmen (2) - JupyterLab
No ratings yet
07 - Assessmen (2) - JupyterLab
10 pages
DLV Lab Manual Print
No ratings yet
DLV Lab Manual Print
29 pages
4-Channel YOLO Training Guide For RGB+IR Drone Detection
No ratings yet
4-Channel YOLO Training Guide For RGB+IR Drone Detection
22 pages
Domain PR Check List3!!! (8647)
No ratings yet
Domain PR Check List3!!! (8647)
304 pages
BLDD VIT ResNet50v2 CustomCNN
No ratings yet
BLDD VIT ResNet50v2 CustomCNN
38 pages
Task1 Lakshya - Ipynb - Colab
No ratings yet
Task1 Lakshya - Ipynb - Colab
33 pages
MLP Pytorch Softmax Crossentr
No ratings yet
MLP Pytorch Softmax Crossentr
20 pages
PINN 1DBurgers
No ratings yet
PINN 1DBurgers
19 pages
Code
No ratings yet
Code
11 pages
Train
No ratings yet
Train
4 pages
Manual Deh 2250ub
0% (1)
Manual Deh 2250ub
112 pages
Report
No ratings yet
Report
15 pages
Lab 9
No ratings yet
Lab 9
29 pages
Skill 7
No ratings yet
Skill 7
11 pages
Fatima Binte Aqeel AI Theory Assignment 3
No ratings yet
Fatima Binte Aqeel AI Theory Assignment 3
7 pages
Softmax Regression Scratch
No ratings yet
Softmax Regression Scratch
5 pages
MLP Pytorch Sigmoid Mse
No ratings yet
MLP Pytorch Sigmoid Mse
20 pages
Charotar University of Science and Technology
No ratings yet
Charotar University of Science and Technology
39 pages
MobileNetV2 Code
No ratings yet
MobileNetV2 Code
3 pages
Deep Learning Programs Updated
No ratings yet
Deep Learning Programs Updated
24 pages
Week 7 - Mnist-Mlp
No ratings yet
Week 7 - Mnist-Mlp
7 pages
Lecture 3 HandDigitRecognition
No ratings yet
Lecture 3 HandDigitRecognition
2 pages
Document 2
No ratings yet
Document 2
8 pages
Faster R-CNN
No ratings yet
Faster R-CNN
20 pages
Web Methods EbXML Module Installation and User's Guide 7.1 SP1
100% (1)
Web Methods EbXML Module Installation and User's Guide 7.1 SP1
154 pages
Yealink T55A Teams Phone Edition User Guide V15.85
No ratings yet
Yealink T55A Teams Phone Edition User Guide V15.85
51 pages
CVDL Tae 63
No ratings yet
CVDL Tae 63
9 pages
Aai 04
No ratings yet
Aai 04
7 pages
Training A Classifier - PyTorch Tutorials 2.3.0+cu121 Documentation
No ratings yet
Training A Classifier - PyTorch Tutorials 2.3.0+cu121 Documentation
8 pages
DL 8
No ratings yet
DL 8
6 pages
Softmax Regression Mnist
No ratings yet
Softmax Regression Mnist
3 pages
Assignment No 13
No ratings yet
Assignment No 13
3 pages
Cifar 100 Dataset
No ratings yet
Cifar 100 Dataset
4 pages
(Deep Learning Using PyTorch) (Cheatsheet)
No ratings yet
(Deep Learning Using PyTorch) (Cheatsheet)
7 pages
Notebook - Agave Plant Maturation Model Inference and Testing
No ratings yet
Notebook - Agave Plant Maturation Model Inference and Testing
7 pages
Lab 1 - Harshil - Parmar
No ratings yet
Lab 1 - Harshil - Parmar
2 pages
Deep Learning Experiments
No ratings yet
Deep Learning Experiments
42 pages
La Dificultad de Escribir Un Ensayo Persuasivo
100% (1)
La Dificultad de Escribir Un Ensayo Persuasivo
7 pages
NN From Scratch
No ratings yet
NN From Scratch
5 pages
DL Practical 6,7 Outputs
No ratings yet
DL Practical 6,7 Outputs
9 pages
Major PPT
No ratings yet
Major PPT
18 pages
Tensorflow 2 Quickstart
No ratings yet
Tensorflow 2 Quickstart
2 pages
Rovertown 1
No ratings yet
Rovertown 1
47 pages
PyTorch CrashCourse
No ratings yet
PyTorch CrashCourse
16 pages
Detect
No ratings yet
Detect
6 pages
JD-Ungana 1326 - Annexe 1-2
No ratings yet
JD-Ungana 1326 - Annexe 1-2
53 pages
Main Python Code
No ratings yet
Main Python Code
31 pages
Transfer Learning For Image Classification in Pytorch
No ratings yet
Transfer Learning For Image Classification in Pytorch
13 pages
Ccnet Only
No ratings yet
Ccnet Only
6 pages
Vit32 GPTMD
No ratings yet
Vit32 GPTMD
6 pages
Val
No ratings yet
Val
9 pages
CV Lab Final AwaisKhan EE A
No ratings yet
CV Lab Final AwaisKhan EE A
7 pages
Propagation Delay and Short-Circuit Power Dissipation Modeling of The CMOS Inverter
No ratings yet
Propagation Delay and Short-Circuit Power Dissipation Modeling of The CMOS Inverter
12 pages
CIFAR - 10 - Dataset - Using - CNN - Aniiiii - HTML
No ratings yet
CIFAR - 10 - Dataset - Using - CNN - Aniiiii - HTML
8 pages
Semtech Broadcast SelectorGuide 2021 Web
No ratings yet
Semtech Broadcast SelectorGuide 2021 Web
12 pages
Document
No ratings yet
Document
1 page
Assignment3 AL
No ratings yet
Assignment3 AL
23 pages
Code
No ratings yet
Code
4 pages
Os Installation
No ratings yet
Os Installation
16 pages
Assignment - 4 - Risk Response, Contingency and Control
No ratings yet
Assignment - 4 - Risk Response, Contingency and Control
4 pages
Ніч яка місячна Sheet music for Piano, Vocals (Piano-Voice)
No ratings yet
Ніч яка місячна Sheet music for Piano, Vocals (Piano-Voice)
1 page
361 Project Code
No ratings yet
361 Project Code
10 pages
Ajay Asthana: Greater Atlanta Area SR SAP Basis/SAP HANA Consultant at SAP
No ratings yet
Ajay Asthana: Greater Atlanta Area SR SAP Basis/SAP HANA Consultant at SAP
6 pages
Detection ORIGINAL
No ratings yet
Detection ORIGINAL
3 pages
DETECTCAMERA
No ratings yet
DETECTCAMERA
3 pages
Belarc Advisor - Computer Profile
No ratings yet
Belarc Advisor - Computer Profile
3 pages
Healthcare ERP Project Success: It's All About Avoiding Missteps
No ratings yet
Healthcare ERP Project Success: It's All About Avoiding Missteps
5 pages
Ilovepdf Merged
No ratings yet
Ilovepdf Merged
10 pages
Dinushasan Courseproject04: Sign in
No ratings yet
Dinushasan Courseproject04: Sign in
19 pages
Keras
No ratings yet
Keras
4 pages
Assignment 3 DS5620
No ratings yet
Assignment 3 DS5620
11 pages
Intro To Pytorch
No ratings yet
Intro To Pytorch
12 pages
F20 HMGT 6335 OPRE 6332 Spreadsheet Modeling SYLLABUS
No ratings yet
F20 HMGT 6335 OPRE 6332 Spreadsheet Modeling SYLLABUS
9 pages
Yescool Amoi-C20 User Manual-English
No ratings yet
Yescool Amoi-C20 User Manual-English
30 pages
EE3706 - Chapter 6 - Capacitors and Inductors
No ratings yet
EE3706 - Chapter 6 - Capacitors and Inductors
27 pages
PyTorch Cheat Sheet & Quick Reference
No ratings yet
PyTorch Cheat Sheet & Quick Reference
6 pages
32 Secret Combinations On Your Keyboard
100% (1)
32 Secret Combinations On Your Keyboard
2 pages
Lab 4-Image Segmentation Using U-Net
No ratings yet
Lab 4-Image Segmentation Using U-Net
9 pages
Predict
No ratings yet
Predict
3 pages
SJ XJ Pump Manual
100% (1)
SJ XJ Pump Manual
18 pages
Object Detection Webcam
No ratings yet
Object Detection Webcam
3 pages
TensorFlow深度学习项目实战: Chinese Edition
From Everand
TensorFlow深度学习项目实战: Chinese Edition
Posts & Telecom Press
No ratings yet
US IT Recruiting Training Material - Road To US Staffing and USA
No ratings yet
US IT Recruiting Training Material - Road To US Staffing and USA
17 pages
HSPA - High Speed Packet Access Tutorial
No ratings yet
HSPA - High Speed Packet Access Tutorial
21 pages
Object Detection Webcam
No ratings yet
Object Detection Webcam
3 pages
Form IEPF 4 - 2017 18 1
No ratings yet
Form IEPF 4 - 2017 18 1
6 pages
MM17 Custom Fields Update MVKE Table
0% (1)
MM17 Custom Fields Update MVKE Table
10 pages
Computer Project: For Loop
No ratings yet
Computer Project: For Loop
11 pages
Bus Bus Bus
No ratings yet
Bus Bus Bus
4 pages
Broch Samcef Mecano PDF
No ratings yet
Broch Samcef Mecano PDF
4 pages

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Presentation

Uploaded by

Presentation

Uploaded by

mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.

Certainly! Below is a comprehensive PowerPoint presentation outline based on the given

Slide 1: Title Slide

Subtitle: Using PyTorch and Faster R-CNN

Presenter: [Your Name]

Slide 3: Dataset Preparation

 Data Directory: Contains images and corresponding annotation files.

Slide 4: Dataset Class

 __init__: Initializes the dataset.

Slide 5: Data Transformation and Augmentation

 Training Transformations: Horizontal and vertical flips.

Slide 6: Data Loading

 Training Data: Split into training and validation sets.

Slide 7: Model Definition

 Pre-trained: Uses ResNet50 with Feature Pyramid Network (FPN).

Slide 8: Training Loop

 Epochs: Iterate over the dataset.

Slide 9: Evaluation Metrics

 Metric: Used to evaluate object detection models.

 Loss: Training loss over epochs.

Slide 11: Visualization

Slide 12: Applying Thresholds

 Non-Maximum Suppression (NMS): Filters overlapping bounding boxes based on

Slide 13: Example Predictions

 Shows images with bounding boxes drawn around detected objects.

Slide 14: Conclusion

 Successfully trained an object detection model for fruits.

 Further fine-tuning of the model.

Slide 15: Q&A

 Open floor for questions from the audience.

Slide 1: Title Slide

Presenter: [Your Name]

Slide 3: Dataset Preparation

Slide 4: Dataset Class

Slide 5: Data Transformation and Augmentation

Slide 6: Data Loading

Slide 7: Model Definition

Slide 8: Training Loop

Slide 9: Evaluation Metrics

Slide 10: Training Results

Slide 11: Visualization

Slide 12: Applying Thresholds

 Non-Maximum Suppression (NMS):

Slide 13: Example Predictions

Slide 14: Conclusion

Slide 15: Q&A

 os: File handling.

def __getitem__(self, idx):

annot_filename = img_name[:-4] + '.xml'

boxes = torch.as_tensor(boxes, dtype=torch.float32)

return img_res, target

def plot_img_bbox(img, target):

 Initializes the training and test datasets.

Faster R-CNN Model

Data Augmentation and Transformation

Training the Model

def train_model(model, optimizer, lr_scheduler, train_loader, val_loader,

for batch_idx, (images, targets) in enumerate(train_loader, 1):

Slide 1: Title Slide

 Title: Object Detection with PyTorch

 Brief overview of object detection

Slide 3: Problem Statement

 Dataset: Fruit Images Dataset

Slide 4: Dataset and Preprocessing

 Description of the Fruit Images Dataset

Slide 5: Model Architecture

 Introduction to Faster R-CNN

Slide 6: Training Setup

 Splitting dataset into train, validation, and test sets

Slide 7: Training Process

 Iterative training loop:

 Sample predictions on test images

Slide 9: Thresholding and Non-Maximum Suppression

 Explanation of thresholding to filter detections

Slide 10: Conclusion

 Recap of key points covered

 init: Initializes the dataset.

def getitem(self, idx):