0% found this document useful (0 votes)
24 views31 pages

Presentation

look at this

Uploaded by

Shubhanka A
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views31 pages

Presentation

look at this

Uploaded by

Shubhanka A
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 31

mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.

etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds prepare complete ppt
presentation for this with full information
ChatGPT

Certainly! Below is a comprehensive PowerPoint presentation outline based on the given


code and the training and evaluation of the Fruit Object Detection Model.

Slide 1: Title Slide


Title: Fruit Object Detection Model

Subtitle: Using PyTorch and Faster R-CNN

Presenter: [Your Name]

Date: [Date]

Slide 2: Introduction
Objectives

 Objective: To build and train an object detection model for fruits using Faster R-
CNN.
 Dataset: Contains images of apples, bananas, and oranges.

Slide 3: Dataset Preparation


Steps

 Data Directory: Contains images and corresponding annotation files.


 Annotations: Stored in XML format following Pascal VOC structure.
 Classes: apple, banana, orange.

Slide 4: Dataset Class


Code Overview
python
Copy code
class FruitImagesDataset(torch.utils.data.Dataset):
def __init__(self, files_dir, width, height, transforms=None):
...
def __getitem__(self, idx):
...
def __len__(self):
...

Key Functions

 __init__: Initializes the dataset.


 __getitem__: Retrieves and preprocesses images and annotations.
 __len__: Returns the total number of images.

Slide 5: Data Transformation and Augmentation


Albumentations Library

 Training Transformations: Horizontal and vertical flips.


 Validation Transformations: Conversion to tensor only.

Code Snippet
python
Copy code
def get_transform(train):
if train:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})
else:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})

Slide 6: Data Loading


Data Loaders

 Training Data: Split into training and validation sets.


 Validation Data: Separate dataset.
 Batch Size: 5

Code Snippet
python
Copy code
train_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)

Slide 7: Model Definition


Faster R-CNN Model

 Pre-trained: Uses ResNet50 with Feature Pyramid Network (FPN).


 Customization: Adjusts the classifier head for the number of fruit classes.

Code Snippet
python
Copy code
def get_object_detection_model(num_classes):
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
num_classes)
return model

Slide 8: Training Loop


Training Procedure

 Epochs: Iterate over the dataset.


 Loss Calculation: Sum of all losses from the model.
 Optimizer: Stochastic Gradient Descent (SGD).

Code Snippet
python
Copy code
def train_model(model, optimizer, lr_scheduler, train_loader, val_loader,
num_epoch=10, device='cuda'):
for epoch in range(1, num_epoch + 1):
...
return model

Slide 9: Evaluation Metrics


Mean Average Precision (mAP)

 Metric: Used to evaluate object detection models.


 Implementation: Uses torchmetrics.

Code Snippet
python
Copy code
import torchmetrics
metric = MeanAveragePrecision()
Slide 10: Training Results
Loss and mAP

 Loss: Training loss over epochs.


 mAP: Validation mean Average Precision.

Example Output
plaintext
Copy code
[Epoch 10 / 30] Train loss: 0.123 [300 secs] | Val mAP: 0.78

Slide 11: Visualization


Plotting Bounding Boxes

 Function: plot_img_bbox
 Visualization: Shows bounding boxes on images.

Code Snippet
python
Copy code
def plot_img_bbox(img, target):
...

Slide 12: Applying Thresholds


Filtering Predictions

 Non-Maximum Suppression (NMS): Filters overlapping bounding boxes based on


scores.

Code Snippet
python
Copy code
def apply_threshold(prediction, threshold=0.6):
...
def apply_threshold_batch(prediction, threshold=0.1):
...

Slide 13: Example Predictions


Sample Output
 Original Predictions: Before applying threshold.
 Filtered Predictions: After applying threshold.

Visualization

 Shows images with bounding boxes drawn around detected objects.

Slide 14: Conclusion


Summary

 Successfully trained an object detection model for fruits.


 Evaluated the model using mAP.
 Demonstrated the effectiveness of applying thresholds.

Next Steps

 Further fine-tuning of the model.


 Exploring more data augmentation techniques.

Slide 15: Q&A


Questions and Answers

 Open floor for questions from the audience.

Feel free to adjust and add more details as necessary to fit the presentation needs.

4o
please expand the bullet points for all slides
2/2
ChatGPT

Certainly! Here's an expanded version of the PowerPoint presentation with detailed bullet
points for each slide.

Slide 1: Title Slide


Title: Fruit Object Detection Model
Subtitle: Using PyTorch and Faster R-CNN

Presenter: [Your Name]

Date: [Date]

Slide 2: Introduction
Objectives

 Objective:
o Develop a robust object detection model to identify and classify fruits.
o Utilize the Faster R-CNN framework for detection tasks.
 Dataset:
o A collection of images containing apples, bananas, and oranges.
o Each image has corresponding annotation files marking the bounding boxes
and labels for the fruits.

Slide 3: Dataset Preparation


Steps

 Data Directory:
o Organize images and annotations within a specific directory structure.
o Images are stored in JPG format.
 Annotations:
o Annotations provided in XML format.
o XML files adhere to the Pascal VOC structure, containing details about
bounding boxes and labels.
 Classes:
o Defined classes: apple, banana, orange.
o The class index mapping: {0: background, 1: apple, 2: banana, 3: orange}.

Slide 4: Dataset Class


Code Overview
python
Copy code
class FruitImagesDataset(torch.utils.data.Dataset):
def __init__(self, files_dir, width, height, transforms=None):
...
def __getitem__(self, idx):
...
def __len__(self):
...

Key Functions

 __init__:
o Initializedataset with image directory, target image dimensions, and
transformations.
o Load image filenames and define class labels.
 __getitem__:
o Load and preprocess image and corresponding annotation for a given index.
o Convert bounding box coordinates to match the target image dimensions.
 __len__:
o Return the total number of images in the dataset.

Slide 5: Data Transformation and Augmentation


Albumentations Library

 Training Transformations:
o Apply random horizontal and vertical flips to augment the dataset.
o Normalize and convert images to tensor format.
 Validation Transformations:
o Normalize and convert images to tensor format without augmentation.

Code Snippet
python
Copy code
def get_transform(train):
if train:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})
else:
return A.Compose([...], bbox_params={'format': 'pascal_voc',
'label_fields': ['labels']})

Slide 6: Data Loading


Data Loaders

 Training Data:
o Split the dataset into training and validation subsets using a specified ratio.
 Validation Data:
o A separate dataset for evaluating the model performance.
 Batch Size:
o Set batch size to 5 for both training and validation loaders.

Code Snippet
python
Copy code
train_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(..., collate_fn=collate_fn)

Slide 7: Model Definition


Faster R-CNN Model

 Pre-trained:
o Initialize Faster R-CNN with a ResNet50 backbone pre-trained on COCO
dataset.
 Customization:
o Modify the classifier head to match the number of classes in our fruit dataset.

Code Snippet
python
Copy code
def get_object_detection_model(num_classes):
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
num_classes)
return model

Slide 8: Training Loop


Training Procedure

 Epochs:
o Train the model for a specified number of epochs.
 Loss Calculation:
o Compute the total loss by summing individual loss components provided by
the model.
 Optimizer:
o Use Stochastic Gradient Descent (SGD) with momentum and weight decay for
optimization.

Code Snippet
python
Copy code
def train_model(model, optimizer, lr_scheduler, train_loader, val_loader,
num_epoch=10, device='cuda'):
for epoch in range(1, num_epoch + 1):
...
return model

Slide 9: Evaluation Metrics


Mean Average Precision (mAP)

 Metric:
o Mean Average Precision (mAP) is a common evaluation metric for object
detection models.
o Measures the accuracy of bounding box predictions and class labels.
 Implementation:
o Use the torchmetrics library to compute mAP during validation.

Code Snippet
python
Copy code
import torchmetrics
metric = MeanAveragePrecision()

Slide 10: Training Results


Loss and mAP

 Loss:
o Track and display the training loss over each epoch.
 mAP:
o Calculate and display the mean Average Precision on the validation set.

Example Output
plaintext
Copy code
[Epoch 10 / 30] Train loss: 0.123 [300 secs] | Val mAP: 0.78

Slide 11: Visualization


Plotting Bounding Boxes

 Function:
o Define a function plot_img_bbox to visualize bounding boxes on images.
 Visualization:
o Display images with bounding boxes drawn around detected objects.
Code Snippet
python
Copy code
def plot_img_bbox(img, target):
...

Slide 12: Applying Thresholds


Filtering Predictions

 Non-Maximum Suppression (NMS):


o Filter overlapping bounding boxes based on their confidence scores to retain
the best detections.

Code Snippet
python
Copy code
def apply_threshold(prediction, threshold=0.6):
...
def apply_threshold_batch(prediction, threshold=0.1):
...

Slide 13: Example Predictions


Sample Output

 Original Predictions:
o Predictions before applying the thresholding and NMS.
 Filtered Predictions:
o Predictions after applying thresholding and NMS to filter out lower confidence
detections.

Visualization

 Show images with bounding boxes drawn around detected objects, highlighting the
difference between original and filtered predictions.

Slide 14: Conclusion


Summary

 Model Training:
o Successfully trained an object detection model for identifying apples, bananas,
and oranges.
 Evaluation:
o Evaluated the model using mAP, showing reasonable performance.
 Visualization:
o Demonstrated the effectiveness of the model through visual examples.

Next Steps

 Fine-Tuning:
o Further fine-tune the model to improve accuracy.
 Augmentation:
o Explore additional data augmentation techniques to enhance model robustness.

Slide 15: Q&A


Questions and Answers

 Open Floor:
o Invite the audience to ask questions and provide clarifications on any part of
the presentation.
 Discussion:
o Engage in discussions about potential improvements, use cases, and future
directions for the project.

Feel free to modify and add more specific details, code snippets, and visual aids such as
charts and graphs to enhance the presentation.

4o
mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds expalin the topic through this
code
ChatGPT

Certainly! Here’s a detailed explanation of the provided code, covering the main aspects of
the fruit object detection project using PyTorch and Faster R-CNN:

Overview
This code implements a fruit object detection model using PyTorch and Faster R-CNN. The
primary goal is to detect and classify fruits (apples, bananas, and oranges) in images.

Import Libraries
First, essential libraries are imported:
python
Copy code
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
from xml.etree import ElementTree as et
import torch
import torchvision
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
%matplotlib inline
print(torch.__version__)

 os: File handling.


 numpy: Array operations.
 matplotlib: Plotting images.
 cv2: Image processing.
 ElementTree: Parsing XML files.
 torch, torchvision: PyTorch and its vision library.
 albumentations: Data augmentation.
 ToTensorV2: Convert image to tensor.

Dataset Class
The FruitImagesDataset class handles loading and preprocessing of the dataset:

python
Copy code
class FruitImagesDataset(torch.utils.data.Dataset):
def __init__(self, files_dir, width, height, transforms=None):
self.transforms = transforms
self.files_dir = files_dir
self.height = height
self.width = width
self.imgs = [image for image in sorted(os.listdir(files_dir)) if
image.endswith('.jpg')]
self.classes = ['_', 'apple', 'banana', 'orange']

def __getitem__(self, idx):


img_name = self.imgs[idx]
image_path = os.path.join(self.files_dir, img_name)
img = cv2.imread(image_path)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
img_res = cv2.resize(img_rgb, (self.width, self.height),
cv2.INTER_AREA)
img_res /= 255.0

annot_filename = img_name[:-4] + '.xml'


annot_file_path = os.path.join(self.files_dir, annot_filename)
boxes = []
labels = []
tree = et.parse(annot_file_path)
root = tree.getroot()

wt = img.shape[1]
ht = img.shape[0]
for member in root.findall('object'):
labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text)
xmax = int(member.find('bndbox').find('xmax').text)
ymin = int(member.find('bndbox').find('ymin').text)
ymax = int(member.find('bndbox').find('ymax').text)
xmin_corr = (xmin / wt) * self.width
xmax_corr = (xmax / wt) * self.width
ymin_corr = (ymin / ht) * self.height
ymax_corr = (ymax / ht) * self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr])

boxes = torch.as_tensor(boxes, dtype=torch.float32)


area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
labels = torch.as_tensor(labels, dtype=torch.int64)

target = {}
target["boxes"] = boxes
target["labels"] = labels
target["area"] = area
target["iscrowd"] = iscrowd
image_id = torch.tensor([idx])
target["image_id"] = image_id

if self.transforms:
sample = self.transforms(image=img_res, bboxes=target['boxes'],
labels=labels)
img_res = sample['image']
target['boxes'] = torch.Tensor(sample['bboxes'])

return img_res, target

def __len__(self):
return len(self.imgs)

Methods:

 __init__: Initializes the dataset with image directory, target dimensions, and
transformations.
 __getitem__: Loads and preprocesses an image and its annotations for a given index.
 __len__: Returns the total number of images in the dataset.

Data Visualization
A function to plot images with bounding boxes:

python
Copy code
import matplotlib.patches as patches

def plot_img_bbox(img, target):


fig, a = plt.subplots(1, 1)
fig.set_size_inches(5, 5)
if isinstance(img, torch.Tensor):
img = transforms.ToPILImage()(img).convert('RGB')
a.imshow(img)
boxes = target['boxes']
if isinstance(boxes, torch.Tensor):
boxes = boxes.tolist()
for box in boxes:
x, y, width, height = box[0], box[1], box[2] - box[0], box[3] -
box[1]
rect = patches.Rectangle((x, y), width, height, linewidth=2,
edgecolor='r', facecolor='none')
a.add_patch(rect)
plt.show()

Dataset Initialization
python
Copy code
files_dir = '/content/drive/MyDrive/fruit/train'
test_dir = '/content/drive/MyDrive/fruit/test'
dataset = FruitImagesDataset(files_dir, 224, 224)
print('length of dataset = ', len(dataset), '\n')

 Initializes the training and test datasets.

Faster R-CNN Model


Function to get a Faster R-CNN model with the desired number of classes:

python
Copy code
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

def get_object_detection_model(num_classes):
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
num_classes)
return model

Data Augmentation and Transformation


Transforms for training and validation datasets:

python
Copy code
def get_transform(train):
if train:
return A.Compose([
A.HorizontalFlip(0.5),
A.VerticalFlip(0.3),
ToTensorV2(p=1.0)
], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']})
else:
return A.Compose([
ToTensorV2(p=1.0)
], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']})

Data Loader
Creating data loaders for training and validation:

python
Copy code
torch.manual_seed(1)
np.random.seed(1)
indices = np.random.permutation(len(dataset)).tolist()
test_split = 0.1
tsize = len(dataset) - int(len(dataset) * test_split)
train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:])
train_loader = torch.utils.data.DataLoader(train_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=5,
shuffle=False, num_workers=2, collate_fn=collate_fn)

Training the Model


Training loop for the model:

python
Copy code
import time
from torchmetrics.detection.mean_ap import MeanAveragePrecision

metric = MeanAveragePrecision()

def train_model(model, optimizer, lr_scheduler, train_loader, val_loader,


num_epoch=10, device='cuda'):
n_batches = len(train_loader)
for epoch in range(1, num_epoch + 1):
print(f"Starting epoch {epoch} of {num_epoch}")
time_start = time.time()
loss_accum = 0.0
loss_classifier_accum = 0.0
model.train()

for batch_idx, (images, targets) in enumerate(train_loader, 1):


images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in
targets]
loss_dict = model(images, targets)
loss = sum(loss for loss in loss_dict.values())
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_accum += loss.item()
loss_classifier_accum += loss_dict['loss_classifier'].item()

if batch_idx % 500 == 0:
print(f" [Batch {batch_idx:3d} / {n_batches:3d}] Batch
train loss: {loss.item():7.3f}.")

lr_scheduler.step()
train_loss = loss_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches
elapsed = time.time() - time_start
print(f"[Epoch {epoch:2d} / {num_epoch:2d}] Train loss:
{train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ')

preds_single = []
targets_single = []
model.eval()
with torch.no_grad():
for batch_idx, (images, targets) in enumerate(val_loader, 1):
images = list(image.to(device)
4o
mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds prepare ppt presentation for this
ChatGPT

To prepare a PowerPoint presentation for this code, you'll want to follow a structured
approach to present the key components and processes involved. Here's a suggested outline
for your presentation:

Slide 1: Title Slide

 Title: Object Detection with PyTorch


 Subtitle: Using Faster R-CNN and Mean Average Precision
 Your Name
 Date

Slide 2: Introduction

 Brief overview of object detection


 Importance in computer vision tasks
 Introduction to PyTorch for deep learning

Slide 3: Problem Statement

 Dataset: Fruit Images Dataset


 Goal: Detect and classify fruits in images
 Challenges: Varying sizes, multiple classes

Slide 4: Dataset and Preprocessing

 Description of the Fruit Images Dataset


 Preprocessing steps:
o Loading images and annotations
o Resizing images and bounding boxes
o Data augmentation

Slide 5: Model Architecture

 Introduction to Faster R-CNN


 Overview of the architecture
 Explanation of key components:
o Backbone (ResNet50)
o Region Proposal Network (RPN)
o Region of Interest (ROI) Pooling
o Fast R-CNN Head
o Loss functions

Slide 6: Training Setup

 Splitting dataset into train, validation, and test sets


 Data loaders for efficient training
 Model initialization and optimization:
o Optimizer: SGD with momentum
o Learning rate scheduler

Slide 7: Training Process

 Iterative training loop:


o Forward pass
o Loss computation
o Backpropagation
o Optimization
 Monitoring training loss and validation mAP
 Evaluation metrics: Mean Average Precision (mAP)

Slide 8: Results

 Sample predictions on test images


 Visualizations of detected fruits with bounding boxes
 Discussion on model performance and improvements

Slide 9: Thresholding and Non-Maximum Suppression

 Explanation of thresholding to filter detections


 Non-Maximum Suppression (NMS) for post-processing
 Implementation details in PyTorch

Slide 10: Conclusion

 Recap of key points covered


 Summary of achievements and challenges
 Future directions for improvement

Slide 11: References

 List of relevant papers, libraries, and resources


 Credits for datasets and code snippets used

Slide 12: Q&A


 Open floor for questions and discussions

This outline provides a structured framework for your presentation, covering the main aspects
of your object detection project implemented in PyTorch. You can customize each slide with
relevant content, code snippets, visualizations, and explanations to effectively communicate
your work to your audience.

3.5
Slide 2: Introduction Brief overview of object detection Importance in computer vision tasks
Introduction to PyTorch for deep learning Slide 3: Problem Statement Dataset: Fruit Images
Dataset Goal: Detect and classify fruits in images Challenges: Varying sizes, multiple classes
Slide 4: Dataset and Preprocessing Description of the Fruit Images Dataset Preprocessing
steps: Loading images and annotations Resizing images and bounding boxes Data
augmentation Slide 5: Model Architecture Introduction to Faster R-CNN Overview of the
architecture Explanation of key components: Backbone (ResNet50) Region Proposal
Network (RPN) Region of Interest (ROI) Pooling Fast R-CNN Head Loss functions Slide 6:
Training Setup Splitting dataset into train, validation, and test sets Data loaders for efficient
training Model initialization and optimization: Optimizer: SGD with momentum Learning
rate scheduler Slide 7: Training Process Iterative training loop: Forward pass Loss
computation Backpropagation Optimization Monitoring training loss and validation mAP
Evaluation metrics: Mean Average Precision (mAP) Slide 8: Results Sample predictions on
test images Visualizations of detected fruits with bounding boxes Discussion on model
performance and improvements Slide 9: Thresholding and Non-Maximum Suppression
Explanation of thresholding to filter detections Non-Maximum Suppression (NMS) for post-
processing Implementation details in PyTorch Slide 10: Conclusion Recap of key points
covered Summary of achievements and challenges Future directions for improvement Slide
11: References List of relevant papers, libraries, and resources Credits for datasets and code
snippets used Slide 12: Q&A Open floor for questions and discussions
ChatGPT

Your presentation outline looks great! It provides a clear structure for presenting the key
components of your object detection project implemented in PyTorch. Each slide covers
important aspects, from the introduction to the conclusion, and includes opportunities for
audience engagement during the Q&A session.

If you have any specific content or details you'd like to include on each slide or if you need
assistance with any particular part of the presentation, feel free to let me know!

3.5
mport os import numpy as np import matplotlib.pyplot as plt import cv2 from xml.etree
import ElementTree as et import torch import torchvision import albumentations as A from
albumentations.pytorch.transforms import ToTensorV2 %matplotlib inline
print(torch._version_) class FruitImagesDataset(torch.utils.data.Dataset): def _init_(self,
files_dir, width, height, transforms=None): self.transforms = transforms self.files_dir =
files_dir self.height = height self.width = width self.imgs = [image for image in
sorted(os.listdir(files_dir)) if image[-4:]=='.jpg'] self.classes = [_, 'apple','banana','orange'] def
_getitem_(self, idx): img_name = self.imgs[idx] image_path = os.path.join(self.files_dir,
img_name) img = cv2.imread(image_path) img_rgb = cv2.cvtColor(img,
cv2.COLOR_BGR2RGB).astype(np.float32) img_res = cv2.resize(img_rgb, (self.width,
self.height), cv2.INTER_AREA) img_res /= 255.0 annot_filename = img_name[:-4] + '.xml'
annot_file_path = os.path.join(self.files_dir, annot_filename) boxes = [] labels = [] tree =
et.parse(annot_file_path) root = tree.getroot() wt = img.shape[1] ht = img.shape[0] for
member in root.findall('object'): labels.append(self.classes.index(member.find('name').text))
xmin = int(member.find('bndbox').find('xmin').text) xmax =
int(member.find('bndbox').find('xmax').text) ymin =
int(member.find('bndbox').find('ymin').text) ymax =
int(member.find('bndbox').find('ymax').text) xmin_corr = (xmin/wt)*self.width xmax_corr =
(xmax/wt)*self.width ymin_corr = (ymin/ht)*self.height ymax_corr = (ymax/ht)*self.height
boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) boxes =
torch.as_tensor(boxes, dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] -
boxes[:, 0]) iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) labels =
torch.as_tensor(labels, dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"]
= labels target["area"] = area target["iscrowd"] = iscrowd image_id = torch.tensor([idx])
target["image_id"] = image_id if self.transforms: sample = self.transforms(image = img_res,
bboxes = target['boxes'], labels = labels) img_res = sample['image'] target['boxes'] =
torch.Tensor(sample['bboxes']) return img_res, target def _len_(self): return len(self.imgs)
import matplotlib.patches as patches def plot_img_bbox(img, target): fig, a =
plt.subplots(1,1) fig.set_size_inches(5,5) if isinstance(img, torch.Tensor): img =
transforms.ToPILImage()(img).convert('RGB') a.imshow(img) boxes = target['boxes'] if
isinstance(boxes, torch.Tensor): boxes = boxes.tolist() for box in (boxes): x, y, width, height
= box[0], box[1], box[2]-box[0], box[3]-box[1] rect = patches.Rectangle((x, y), width, height,
linewidth = 2, edgecolor = 'r', facecolor = 'none') a.add_patch(rect) plt.show() files_dir =
'/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') files_dir
= '/content/drive/MyDrive/fruit/train' test_dir = '/content/drive/MyDrive/fruit/test' dataset =
FruitImagesDataset(files_dir, 224, 224) print('length of dataset = ', len(dataset), '\n') from
torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn from
torchvision.models.detection.faster_rcnn import FastRCNNPredictor from
torchvision.models.detection.mask_rcnn import MaskRCNNPredictor from torchvision
import transforms def get_object_detection_model(num_classes): model =
fasterrcnn_resnet50_fpn(pretrained=True) in_features =
model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor =
FastRCNNPredictor(in_features, num_classes) return model def collate_fn(batch): return
tuple(zip(*batch)) def get_transform(train): if train: return
A.Compose([ A.HorizontalFlip(0.5), A.VerticalFlip(0.3), ToTensorV2(p=1.0) ],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return
A.Compose([ ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields':
['labels']}) dataset = FruitImagesDataset(files_dir, 224, 224,
transforms=get_transform(train=True)) dataset_test = FruitImagesDataset(test_dir, 224, 224,
transforms=get_transform(train=False)) torch.manual_seed(1) np.random.seed(1) indices =
np.random.permutation(len(dataset)).tolist() test_split = 0.1 tsize = len(dataset) -
int(len(dataset)*test_split) train_data = torch.utils.data.Subset(dataset, indices[:tsize])
val_data = torch.utils.data.Subset(dataset, indices[tsize:]) train_loader =
torch.utils.data.DataLoader( train_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=5,
shuffle=True, num_workers=2, collate_fn=collate_fn) test_loader =
torch.utils.data.DataLoader( dataset_test, batch_size=5, shuffle=False, num_workers=2,
collate_fn=collate_fn) n_batches, n_batches_test = len(train_loader), len(test_loader) images,
targets = next(iter(train_loader)) print(targets[0]) plot_img_bbox(images[0], targets[0])
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes
= 4 model = get_object_detection_model(num_classes) model.to(device) params = [p for p in
model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) import time !pip
install torchmetrics from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision() def train_model(model, data_loader=None,
num_epoch=10): for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 # loss_mask_accum = 0.0
loss_classifier_accum = 0.0 model.train() for batch_idx, (images, targets) in
enumerate(data_loader, 1): # Predict images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images,
targets) loss = sum(loss for loss in loss_dict.values()) # Backprop optimizer.zero_grad()
loss.backward() optimizer.step() # Logging # loss_mask = loss_dict['loss_mask'].item()
loss_accum += loss.item() # loss_mask_accum += loss_mask loss_classifier_accum +=
loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch {batch_idx:3d} /
{n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() # Train losses
train_loss = loss_accum / n_batches # train_loss_mask = loss_mask_accum / n_batches
train_loss_classifier = loss_classifier_accum / n_batches elapsed = time.time() - time_start
torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin") prefix = f"[Epoch {epoch:2d}
/ {num_epoch:2d}]" # print(prefix) # print(f"{prefix} Train mask-only loss:
{train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}") print(f"{prefix} Train
loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') preds_single = [] targets_single = [] for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) model.eval() with torch.no_grad(): pred = model(images)
preds_single.extend(pred) metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model def
train_model(model, train_loader, num_epochs): # Define your training loop here for epoch in
range(num_epochs): # Training steps for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass output = model(data) # Compute loss loss = compute_loss(output, target) #
Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() #
Print training progress if batch_idx % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss:
{:.4f}' .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item())) return
model pip install torchmetrics import time import torch from torchmetrics.detection.mean_ap
import MeanAveragePrecision metric = MeanAveragePrecision() def train_model(model,
optimizer, lr_scheduler, train_loader, val_loader, num_epoch=10, device='cuda'): n_batches
= len(train_loader) for epoch in range(1, num_epoch + 1): print(f"Starting epoch {epoch} of
{num_epoch}") time_start = time.time() loss_accum = 0.0 loss_classifier_accum = 0.0
model.train() for batch_idx, (images, targets) in enumerate(train_loader, 1): # Move data to
device images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v
in t.items()} for t in targets] # Forward pass loss_dict = model(images, targets) loss =
sum(loss for loss in loss_dict.values()) # Backpropagation optimizer.zero_grad()
loss.backward() optimizer.step() # Logging loss_accum += loss.item() loss_classifier_accum
+= loss_dict['loss_classifier'].item() if batch_idx % 500 == 0: print(f" [Batch
{batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.") lr_scheduler.step() #
Compute average training loss train_loss = loss_accum / n_batches train_loss_classifier =
loss_classifier_accum / n_batches elapsed = time.time() - time_start print(f"[Epoch
{epoch:2d} / {num_epoch:2d}] Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ') #
Validation preds_single = [] targets_single = [] model.eval() with torch.no_grad(): for
batch_idx, (images, targets) in enumerate(val_loader, 1): images = list(image.to(device) for
image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
targets_single.extend(targets) pred = model(images) preds_single.extend(pred) # Update
metric and compute mAP metric.update(preds_single, targets_single) batch_map =
metric.compute() print(f"Val mAP: {batch_map['map']}") return model # Instantiate your
model num_classes = 4 # Modify based on your dataset model =
get_object_detection_model(num_classes) model.to(device) # Define optimizer and
scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer =
torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler =
torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # Provide DataLoader
for training and validation train_loader = torch.utils.data.DataLoader(train_data,
batch_size=5, shuffle=True, num_workers=2, collate_fn=collate_fn) val_loader =
torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=2,
collate_fn=collate_fn) # Train the model num_epoch = 30 # Modify as needed model =
train_model(model, optimizer, lr_scheduler, train_loader, val_loader, num_epoch, device)
images, targets = next(iter(test_loader)) print(targets[0]) plot_img_bbox(images[0],
targets[0]) import torchvision def apply_threshold(prediction, threshold=0.6): index =
torchvision.ops.nms(prediction['boxes'], prediction['scores'], threshold) pred =
prediction.copy() pred['boxes'] = pred['boxes'][index] pred['scores'] = pred['scores'][index]
pred['labels'] = pred['labels'][index] return pred def apply_threshold_batch(prediction,
threshold=0.1): preds = [] for data in prediction: preds.append(apply_threshold(data,
threshold)) return preds model.to(device) model.eval() with torch.no_grad(): prediction =
model([images[0].to(device)]) print(prediction) filtered_preds =
apply_threshold_batch(prediction, threshold=0.1) filtered_preds import torch def
apply_threshold_batch(predictions, threshold=0.5): filtered_preds = [] for pred in predictions:
boxes = pred['boxes'] labels = pred['labels'] scores = pred['scores'] # Apply thresholding mask
= scores > threshold filtered_boxes = boxes[mask] filtered_labels = labels[mask]
filtered_scores = scores[mask] # Construct filtered prediction dictionary filtered_pred =
{ 'boxes': filtered_boxes, 'labels': filtered_labels, 'scores': filtered_scores }
filtered_preds.append(filtered_pred) return filtered_preds # Example prediction prediction =
[{'boxes': torch.tensor([[100, 100, 200, 200], [150, 150, 250, 250]]), 'labels': torch.tensor([1,
2]), 'scores': torch.tensor([0.8, 0.6])}] # Apply threshold filtered_preds =
apply_threshold_batch(prediction, threshold=0.7) print(filtered_preds)
plot_img_bbox(images[2], filtered_preds[0]) import torchvision def
apply_threshold(prediction, threshold=0.6): index = torchvision.ops.nms(prediction['boxes'],
prediction['scores'], threshold) pred = prediction.copy() pred['boxes'] = pred['boxes'][index]
pred['scores'] = pred['scores'][index] pred['labels'] = pred['labels'][index] return pred def
apply_threshold_batch(prediction, threshold=0.1): preds = [] for data in prediction:
preds.append(apply_threshold(data, threshold)) return preds based on this explain what we
are doing in this project and concept of this
ChatGPT

In this project, we're implementing an object detection system using PyTorch. The goal is to
detect and classify fruits in images, which involves identifying the presence of fruit instances
and labeling them with their corresponding classes (e.g., apple, banana, orange).

Here's a breakdown of what we're doing:

1. Dataset Preparation: We're using the Fruit Images Dataset, which contains images
of various fruits along with annotations specifying the bounding boxes and class
labels of the fruits in the images. We preprocess the dataset by loading images,
resizing them, and extracting bounding box annotations.
2. Model Selection: We're using the Faster R-CNN architecture, a popular choice for
object detection tasks. This architecture consists of a backbone network (ResNet50), a
Region Proposal Network (RPN), and a Region of Interest (ROI) Pooling layer,
among other components.
3. Training Setup: We split the dataset into training, validation, and test sets. We define
data loaders for efficient training and validation. The model is initialized with pre-
trained weights and optimized using the SGD optimizer with momentum. We also set
up a learning rate scheduler to adjust the learning rate during training.
4. Training Process: We train the model using an iterative training loop. In each
iteration, we perform a forward pass to compute the loss, followed by
backpropagation and optimization. We monitor the training loss and validation mean
Average Precision (mAP) as evaluation metrics.
5. Results Analysis: We analyze the results by visualizing sample predictions on test
images. We discuss the model's performance and potential improvements.
6. Thresholding and Non-Maximum Suppression: We apply thresholding to filter out
detections with low confidence scores and use Non-Maximum Suppression (NMS) for
post-processing to remove redundant bounding boxes.

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy