Task1 Lakshya - Ipynb - Colab
Task1 Lakshya - Ipynb - Colab
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
traindir = '/content/drive/MyDrive/dataset/train/'
testdir = '/content/drive/MyDrive/dataset/test/'
categories = []
img_categories = []
n_train = []
n_test = []
hs = []
ws = []
image_df = pd.DataFrame({
'category': img_categories,
'height': hs,
'width': ws
})
print(image_df)
unnormalize = transforms.Normalize(
mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225],
std=[1 / 0.229, 1 / 0.224, 1 / 0.225]
)
image_tensor = unnormalize(tensor)
image = image_tensor.numpy().transpose((1, 2, 0))
image = image.clip(0, 1)
plt.imshow(image)
if title is not None:
plt.title(title)
plt.axis('off')
images = {
'train': datasets.ImageFolder(root=traindir, transform=simple_transform),
'test': datasets.ImageFolder(root=testdir, transform=simple_transform)
}
images_aug = {
'train': datasets.ImageFolder(root=traindir, transform=image_aug_transforms),
'test': datasets.ImageFolder(root=testdir, transform=simple_transform)
}
batch_size = 8
dataloaders = {
'train': DataLoader(torch.utils.data.Subset(images['train'], range(0, 140)), batch_size=batch_size, shuffle=True),
'val' : DataLoader(torch.utils.data.Subset(images['train'], range(140, 160)), batch_size=batch_size, shuffle=True),
'test': DataLoader(images['test'], batch_size=batch_size, shuffle=True)
}
aug_dataloaders = {
'train': DataLoader(torch.utils.data.Subset(images_aug['train'], range(0, 140)), batch_size=batch_size, shuffle=True),
'val' : DataLoader(torch.utils.data.Subset(images_aug['train'], range(140, 160)), batch_size=batch_size, shuffle=True),
'test': DataLoader(images_aug['test'], batch_size=batch_size, shuffle=True)
}
train_data_iter = iter(dataloaders['train'])
images_batch, labels = next(train_data_iter)
plt.figure(figsize=(12, 8))
for i in range(6):
plt.subplot(2, 3, i + 1)
imshow_tensor(images_batch[i])
plt.axis('off')
Image batch shape: torch.Size([8, 3, 224, 224])
Label batch shape: torch.Size([8])
Batch labels: tensor([0, 0, 1, 0, 1, 1, 1, 0])
plt.figure(figsize=(12, 8))
for i in range(6):
plt.subplot(2, 3, i + 1)
imshow_tensor(images_aug[i])
plt.axis('off')
Augmented image batch shape: torch.Size([8, 3, 224, 224])
Label batch shape: torch.Size([8])
Batch labels (augmented): tensor([0, 0, 1, 0, 0, 1, 0, 1])
category_count = pd.DataFrame({
'Category': [category for category, _ in images['train'].class_to_idx.items()],
'Number of images': [len(os.listdir(os.path.join(traindir, category))) for category in images['train'].class_to_idx]
})
print("Class distribution:")
print(category_count)
Class distribution:
Category Number of images
0 bear 80
1 panda 80
Image size statistics:
Height Width
count 140.0 140.0
mean 224.0 224.0
std 0.0 0.0
min 224.0 224.0
25% 224.0 224.0
50% 224.0 224.0
75% 224.0 224.0
max 224.0 224.0
keyboard_arrow_down VGG
def define_model():
model = nn.Sequential(
nn.Conv2d(3, 32, 3),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(32*111*111 , 128),
nn.ReLU(),
nn.Linear(128, 2)
)
return model
vgg1 = define_model()
if DEVICE == 'cuda':
vgg1.to('cuda')
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 32, 222, 222] 896
ReLU-2 [-1, 32, 222, 222] 0
MaxPool2d-3 [-1, 32, 111, 111] 0
Flatten-4 [-1, 394272] 0
Linear-5 [-1, 128] 50,466,944
ReLU-6 [-1, 128] 0
Linear-7 [-1, 2] 258
================================================================
Total params: 50,468,098
Trainable params: 50,468,098
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 30.08
Params size (MB): 192.52
Estimated Total Size (MB): 223.18
----------------------------------------------------------------
vgg1.class_to_idx = images['train'].class_to_idx
vgg1.idx_to_class = {
idx: class_
for class_, idx in vgg1.class_to_idx.items()
}
list(vgg1.idx_to_class.items())[:10]
50468098
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg1.parameters())
n_epochs = 10
train_losses = []
val_losses = []
train_accuracy = []
valid_accuracy = []
training_time = []
writer = SummaryWriter(f'runs/vgg1/')
step = 0
overall_start_time = time()
for epoch in range(n_epochs):
train_loss = 0.0
val_loss = 0.0
train_acc = 0
valid_acc = 0
vgg1.train()
start_time = time()
for data, target in dataloaders['train']:
if DEVICE == 'cuda':
data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
output = vgg1(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
pred = torch.argmax(input = output, dim = 1)
correct_labels = pred.eq(target.data.view_as(pred))
acc = torch.mean(correct_labels.type(torch.FloatTensor))
train_acc += acc.item() * data.size(0)
train_loss = train_loss/len(dataloaders['train'].sampler)
val_loss = val_loss/len(dataloaders['val'].sampler)
train_acc = train_acc / len(dataloaders['train'].sampler)
valid_acc = valid_acc / len(dataloaders['val'].sampler)
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accuracy.append(train_acc)
valid_accuracy.append(valid_acc)
training_time.append(train_time)
ax[0].legend()
ax[1].legend()
<matplotlib.legend.Legend at 0x79377e26b460>
test_acc = 0
step = 0
test_images = []
test_labels = []
predictions = []
output = vgg1(input)
pred = torch.argmax(input=output, dim=1)
correct_labels = pred.eq(target.data.view_as(pred))
acc = torch.mean(correct_labels.type(torch.FloatTensor))
test_acc += acc.item() * input.size(0)
test_images.append(input.cpu())
test_labels.append(target.cpu())
predictions.append(pred.cpu())
img = input[0].cpu().squeeze().permute(1, 2, 0).numpy()
img = np.clip(img, 0, 1)
fig, ax = plt.subplots()
ax.imshow(img)
ax.set_title(f'Predicted: {vgg1.idx_to_class[pred[0].item()]}')
ax.set_axis_off()
step += 1
for i, ax in enumerate(axes):
if i < test_images.size(0):
img = test_images[i].squeeze().permute(1, 2, 0).numpy()
img = np.clip(img, 0, 1)
ax.imshow(img)
ax.set_title(f'Pred: {vgg1.idx_to_class[predictions[i].item()]}', fontsize=10)
ax.set_axis_off()
plt.tight_layout()
writer.add_figure('All Test Images & Predictions', fig)
plt.show()
%reload_ext tensorboard
Filter runs (regex) Filter tags (regex) All Scalars Image Histogram Settings
Link by step 4
Card Width
SCALARS
Smoothing
0.6
Alphabetical
Test images & Label
Ignore outliers in chart scaling
. Step 4
HISTOGRAMS
Mode
keyboard_arrow_down VGG 3
def define_model():
model = nn.Sequential(
nn.Conv2d(3, 32, 3), # 3 input channel, 32 filters of size 3x3
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 3),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 128, 3),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(26 * 26 *128 , 128),
nn.ReLU(),
nn.Linear(128, 2)
)
return model
vgg3 = define_model()
if DEVICE == 'cuda':
vgg3.to('cuda')
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 32, 222, 222] 896
ReLU-2 [-1, 32, 222, 222] 0
MaxPool2d-3 [-1, 32, 111, 111] 0
Conv2d-4 [-1, 64, 109, 109] 18,496
ReLU-5 [-1, 64, 109, 109] 0
MaxPool2d-6 [-1, 64, 54, 54] 0
Conv2d-7 [-1, 128, 52, 52] 73,856
ReLU-8 [-1, 128, 52, 52] 0
MaxPool2d-9 [-1, 128, 26, 26] 0
Flatten-10 [-1, 86528] 0
Linear-11 [-1, 128] 11,075,712
ReLU-12 [-1, 128] 0
Linear-13 [-1, 2] 258
================================================================
Total params: 11,169,218
Trainable params: 11,169,218
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 46.70
Params size (MB): 42.61
Estimated Total Size (MB): 89.88
----------------------------------------------------------------
vgg3.class_to_idx = images['train'].class_to_idx
vgg3.idx_to_class = {
idx: class_
for class_, idx in vgg3.class_to_idx.items()
}
list(vgg3.idx_to_class.items())[:10]
11169218
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg3.parameters())
n_epochs = 10
train_losses = []
val_losses = []
train_accuracy = []
valid_accuracy = []
training_time = []
writer = SummaryWriter(f'runs/vgg3/')
step = 0
overall_start_time = time()
for epoch in range(n_epochs):
train_loss = 0.0
val_loss = 0.0
train_acc = 0
valid_acc = 0
optimizer.zero_grad()
output = vgg3(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
step += 1
train_time = time() - start_time
print(f'Training time in {epoch+1} is {train_time}\n')
train_loss = train_loss/len(dataloaders['train'].sampler)
val_loss = val_loss/len(dataloaders['val'].sampler)
train_acc = train_acc / len(dataloaders['train'].sampler)
valid_acc = valid_acc / len(dataloaders['val'].sampler)
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accuracy.append(train_acc)
valid_accuracy.append(valid_acc)
training_time.append(train_time)
<matplotlib.legend.Legend at 0x79377f8c2950>
test_acc = 0
step = 0
test_images = []
test_labels = []
predictions = []
output = vgg3(input)
pred = torch.argmax(input=output, dim=1)
correct_labels = pred.eq(target.data.view_as(pred))
acc = torch.mean(correct_labels.type(torch.FloatTensor))
test_acc += acc.item() * input.size(0)
test_images.append(input.cpu())
test_labels.append(target.cpu())
predictions.append(pred.cpu())
fig, ax = plt.subplots()
ax.imshow(img)
ax.set_title(f'Predicted: {vgg3.idx_to_class[pred[0].item()]}')
ax.set_axis_off()
writer.add_scalar('Testing Accuracy / Iteration', acc, global_step=step)
writer.add_figure('Test images & Label', fig, global_step=step)
step += 1
for i, ax in enumerate(axes):
if i < test_images.size(0):
img = test_images[i].squeeze().permute(1, 2, 0).numpy()
img = np.clip(img, 0, 1)
ax.imshow(img)
ax.set_title(f'Pred: {vgg3.idx_to_class[predictions[i].item()]}', fontsize=10)
ax.set_axis_off()
plt.tight_layout()
writer.add_figure('All Test Images & Predictions', fig)
plt.show()
%reload_ext tensorboard
Reusing TensorBoard on port 6007 (pid 18277), started 0:14:12 ago. (Use '!kill 18277' to kill it.)
Filter runs (regex) Filter tags (regex) All Scalars Image Histogram Settings
Link by step 4
Card Width
SCALARS
Smoothing
0.6
Alphabetical
Test images & Label
Ignore outliers in chart scaling
. Step 4
HISTOGRAMS
Mode
Offset
Double-click (or enter) to edit
11169218
if DEVICE == 'cuda':
vgg3_aug.to('cuda')
class_to_idx = aug_dataloaders['train'].dataset.dataset.class_to_idx
idx_to_class = {idx: cls for cls, idx in class_to_idx.items()}
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg3_aug.parameters())
n_epochs = 10
train_losses = []
val_losses = []
train_accuracy = []
valid_accuracy = []
training_time = []
writer1 = SummaryWriter(f'runs/vgg3_aug/')
step = 0
overall_start_time = time()
for epoch in range(n_epochs):
train_loss = 0.0
val_loss = 0.0
train_acc = 0
valid_acc = 0
train_loss = train_loss/len(aug_dataloaders['train'].sampler)
val_loss = val_loss/len(aug_dataloaders['val'].sampler)
train_acc = train_acc / len(aug_dataloaders['train'].sampler)
valid_acc = valid_acc / len(aug_dataloaders['val'].sampler)
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accuracy.append(train_acc)
valid_accuracy.append(valid_acc)
training_time.append(train_time)
<matplotlib.legend.Legend at 0x79377fc152a0>
test_acc = 0
step = 0
test_images = []
test_labels = []
predictions = []
output = vgg3_aug(input)
pred = torch.argmax(input=output, dim=1)
correct_labels = pred.eq(target.data.view_as(pred))
acc = torch.mean(correct_labels.type(torch.FloatTensor))
test_acc += acc.item() * input.size(0)
test_images.append(input.cpu())
test_labels.append(target.cpu())
predictions.append(pred.cpu())
fig, ax = plt.subplots()
ax.imshow(img)
ax.set_title(f'Predicted: {idx_to_class[pred[0].item()]}')
ax.set_axis_off()
writer1.add_scalar('Testing Accuracy / Iteration', acc, global_step=step)
writer1.add_figure('Test images & Label', fig, global_step=step)
step += 1
for i, ax in enumerate(axes):
if i < test_images.size(0):
img = test_images[i].squeeze().permute(1, 2, 0).numpy()
img = np.clip(img, 0, 1)
ax.imshow(img)
ax.set_title(f'Pred: {idx_to_class[predictions[i].item()]}', fontsize=10)
ax.set_axis_off()
plt.tight_layout()
writer1.add_figure('All Test Images & Predictions', fig)
plt.show()
%reload_ext tensorboard
Reusing TensorBoard on port 6008 (pid 20259), started 0:08:35 ago. (Use '!kill 20259' to kill it.)
Filter runs (regex) Filter tags (regex) All Scalars Image Histogram Settings
Link by step 4
Card Width
SCALARS
Smoothing
0.6
Alphabetical
Test images & Label
Ignore outliers in chart scaling
. Step 4
HISTOGRAMS
Mode
Offset
Start coding or generate with AI.
vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features, 2)
for params in vgg16.parameters():
params.requires_grad = False
if DEVICE == 'cuda':
vgg16.to('cuda')
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 224, 224] 1,792
ReLU-2 [-1, 64, 224, 224] 0
Conv2d-3 [-1, 64, 224, 224] 36,928
ReLU-4 [-1, 64, 224, 224] 0
MaxPool2d-5 [-1, 64, 112, 112] 0
Conv2d-6 [-1, 128, 112, 112] 73,856
ReLU-7 [-1, 128, 112, 112] 0
Conv2d-8 [-1, 128, 112, 112] 147,584
ReLU-9 [-1, 128, 112, 112] 0
MaxPool2d-10 [-1, 128, 56, 56] 0
Conv2d-11 [-1, 256, 56, 56] 295,168
ReLU-12 [-1, 256, 56, 56] 0
Conv2d-13 [-1, 256, 56, 56] 590,080
ReLU-14 [-1, 256, 56, 56] 0
Conv2d-15 [-1, 256, 56, 56] 590,080
ReLU-16 [-1, 256, 56, 56] 0
MaxPool2d-17 [-1, 256, 28, 28] 0
Conv2d-18 [-1, 512, 28, 28] 1,180,160
ReLU-19 [-1, 512, 28, 28] 0
Conv2d-20 [-1, 512, 28, 28] 2,359,808
ReLU-21 [-1, 512, 28, 28] 0
Conv2d-22 [-1, 512, 28, 28] 2,359,808
ReLU-23 [-1, 512, 28, 28] 0
MaxPool2d-24 [-1, 512, 14, 14] 0
Conv2d-25 [-1, 512, 14, 14] 2,359,808
ReLU-26 [-1, 512, 14, 14] 0
Conv2d-27 [-1, 512, 14, 14] 2,359,808
ReLU-28 [-1, 512, 14, 14] 0
Conv2d-29 [-1, 512, 14, 14] 2,359,808
ReLU-30 [-1, 512, 14, 14] 0
MaxPool2d-31 [-1, 512, 7, 7] 0
AdaptiveAvgPool2d-32 [-1, 512, 7, 7] 0
Linear-33 [-1, 4096] 102,764,544
ReLU-34 [-1, 4096] 0
Dropout-35 [-1, 4096] 0
Linear-36 [-1, 4096] 16,781,312
ReLU-37 [-1, 4096] 0
Dropout-38 [-1, 4096] 0
Linear-39 [-1, 2] 8,194
================================================================
Total params: 134,268,738
Trainable params: 119,554,050
Non-trainable params: 14,714,688
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 218.77
Params size (MB): 512.19
Estimated Total Size (MB): 731.54
----------------------------------------------------------------
vgg16.class_to_idx = images['train'].class_to_idx
vgg16.idx_to_class = {
idx: class_
for class_, idx in vgg16.class_to_idx.items()
}
list(vgg16.idx_to_class.items())[:10]
134268738
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16.parameters())
n_epochs = 10
train_losses = []
val_losses = []
train_accuracy = []
valid_accuracy = []
training_time = []
writer = SummaryWriter(f'runs/vgg16/')
step = 0
overall_start_time = time()
for epoch in range(n_epochs):
train_loss = 0.0
val_loss = 0.0
train_acc = 0
valid_acc = 0
step += 1
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accuracy.append(train_acc)
valid_accuracy.append(valid_acc)
training_time.append(train_time)
<matplotlib.legend.Legend at 0x79379ed6f3a0>
test_acc = 0
step = 0
test_images = []
test_labels = []
predictions = []
output = vgg16(input)
pred = torch.argmax(input=output, dim=1)
correct_labels = pred.eq(target.data.view_as(pred))
acc = torch.mean(correct_labels.type(torch.FloatTensor))
test_acc += acc.item() * input.size(0)
test_images.append(input.cpu())
test_labels.append(target.cpu())
predictions.append(pred.cpu())
fig, ax = plt.subplots()
ax.imshow(img)
ax.set_title(f'Predicted: {vgg16.idx_to_class[pred[0].item()]}')
ax.set_axis_off()
step += 1
for i, ax in enumerate(axes):
if i < test_images.size(0):
img = test_images[i].squeeze().permute(1, 2, 0).numpy()
img = np.clip(img, 0, 1)
ax.imshow(img)
ax.set_title(f'Pred: {vgg16.idx_to_class[predictions[i].item()]}', fontsize=10)
ax.set_axis_off()
plt.tight_layout()
writer.add_figure('All Test Images & Predictions', fig)
plt.show()
%reload_ext tensorboard
Reusing TensorBoard on port 6009 (pid 21159), started 0:08:18 ago. (Use '!kill 21159' to kill it.)
Filter runs (regex) Filter tags (regex) All Scalars Image Histogram Settings
Link by step 4
Card Width
SCALARS
Smoothing
0.6
Alphabetical
Test images & Label
Ignore outliers in chart scaling
. Step 4
HISTOGRAMS
Mode
Offset
vgg16_ft.classifier[6] = nn.Linear(vgg16_ft.classifier[6].in_features, 2)
if DEVICE == 'cuda':
vgg16_ft.to('cuda')
vgg16_ft.class_to_idx = images['train'].class_to_idx
vgg16_ft.idx_to_class = {
idx: class_
for class_, idx in vgg16_ft.class_to_idx.items()
}
list(vgg16_ft.idx_to_class.items())[:10]
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 224, 224] 1,792
ReLU-2 [-1, 64, 224, 224] 0
Conv2d-3 [-1, 64, 224, 224] 36,928
ReLU-4 [-1, 64, 224, 224] 0
MaxPool2d-5 [-1, 64, 112, 112] 0
Conv2d-6 [-1, 128, 112, 112] 73,856
ReLU-7 [-1, 128, 112, 112] 0
Conv2d-8 [-1, 128, 112, 112] 147,584
ReLU-9 [-1, 128, 112, 112] 0
MaxPool2d-10 [-1, 128, 56, 56] 0
Conv2d-11 [-1, 256, 56, 56] 295,168
ReLU-12 [-1, 256, 56, 56] 0
Conv2d-13 [-1, 256, 56, 56] 590,080
ReLU-14 [-1, 256, 56, 56] 0
Conv2d-15 [-1, 256, 56, 56] 590,080
ReLU-16 [-1, 256, 56, 56] 0
MaxPool2d-17 [-1, 256, 28, 28] 0
Conv2d-18 [-1, 512, 28, 28] 1,180,160
ReLU-19 [-1, 512, 28, 28] 0
Conv2d-20 [-1, 512, 28, 28] 2,359,808
ReLU-21 [-1, 512, 28, 28] 0
Conv2d-22 [-1, 512, 28, 28] 2,359,808
ReLU-23 [-1, 512, 28, 28] 0
MaxPool2d-24 [-1, 512, 14, 14] 0
Conv2d-25 [-1, 512, 14, 14] 2,359,808
ReLU-26 [-1, 512, 14, 14] 0
Conv2d-27 [-1, 512, 14, 14] 2,359,808
ReLU-28 [-1, 512, 14, 14] 0
Conv2d-29 [-1, 512, 14, 14] 2,359,808
ReLU-30 [-1, 512, 14, 14] 0
MaxPool2d-31 [-1, 512, 7, 7] 0
AdaptiveAvgPool2d-32 [-1, 512, 7, 7] 0
Linear-33 [-1, 4096] 102,764,544
ReLU-34 [-1, 4096] 0
Dropout-35 [-1, 4096] 0
Linear-36 [-1, 4096] 16,781,312
ReLU-37 [-1, 4096] 0
Dropout-38 [-1, 4096] 0
Linear-39 [-1, 2] 8,194
================================================================
Total params: 134,268,738
Trainable params: 134,268,738
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 218.77
Params size (MB): 512.19
Estimated Total Size (MB): 731.54
----------------------------------------------------------------
134268738
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16_ft.parameters())
n_epochs = 10
train_losses = []
val_losses = []
train_accuracy = []
valid_accuracy = []
training_time = []
writer = SummaryWriter(f'runs/vgg16_ft/')
step = 0
overall_start_time = time()
for epoch in range(n_epochs):
train_loss = 0.0
val_loss = 0.0
train_acc = 0
valid_acc = 0
step += 1
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accuracy.append(train_acc)
valid_accuracy.append(valid_acc)
training_time.append(train_time)
test_acc = 0
step = 0
test_images = []
test_labels = []
predictions = []
output = vgg16_ft(input)
pred = torch.argmax(input=output, dim=1)
correct_labels = pred.eq(target.data.view_as(pred))
acc = torch.mean(correct_labels.type(torch.FloatTensor))
test_acc += acc.item() * input.size(0)
test_images.append(input.cpu())
test_labels.append(target.cpu())
predictions.append(pred.cpu())
fig, ax = plt.subplots()
ax.imshow(img)
ax.set_title(f'Predicted: {vgg3.idx_to_class[pred[0].item()]}')
ax.set_axis_off()
step += 1
for i, ax in enumerate(axes):
if i < test_images.size(0):
img = test_images[i].squeeze().permute(1, 2, 0).numpy()
img = np.clip(img, 0, 1)
ax.imshow(img)
ax.set_title(f'Pred: {vgg16_ft.idx_to_class[predictions[i].item()]}', fontsize=10)
ax.set_axis_off()
plt.tight_layout()
writer.add_figure('All Test Images & Predictions', fig)
plt.show()
%reload_ext tensorboard
Reusing TensorBoard on port 6010 (pid 22365), started 0:08:09 ago. (Use '!kill 22365' to kill it.)
Filter runs (regex) Filter tags (regex) All Scalars Image Histogram Settings
Link by step 4
Card Width
SCALARS
Smoothing
0.6
Alphabetical
Test images & Label
Ignore outliers in chart scaling
. Step 4
HISTOGRAMS
Mode
Offset
MLP.class_to_idx = images['train'].class_to_idx
MLP.idx_to_class = {
idx: class_
for class_, idx in images['train'].class_to_idx.items()
}
list(MLP.idx_to_class.items())[:10]
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Linear-1 [-1, 900] 135,476,100
ReLU-2 [-1, 900] 0
Linear-3 [-1, 512] 461,312
ReLU-4 [-1, 512] 0
Linear-5 [-1, 256] 131,328
ReLU-6 [-1, 256] 0
Linear-7 [-1, 128] 32,896
ReLU-8 [-1, 128] 0
Linear-9 [-1, 2] 258
================================================================
Total params: 136,101,894
Trainable params: 136,101,894
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 0.03
Params size (MB): 519.19
Estimated Total Size (MB): 519.79
----------------------------------------------------------------
136101894
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(MLP.parameters())
n_epochs = 10
train_losses = []
val_losses = []
train_accuracy = []
valid_accuracy = []
training_time = []
writer = SummaryWriter(f'runs/MLP/')
step = 0
overall_start_time = time()
for epoch in range(n_epochs):
train_loss = 0.0
val_loss = 0.0
train_acc = 0
valid_acc = 0
step += 1
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accuracy.append(train_acc)
valid_accuracy.append(valid_acc)
training_time.append(train_time)
<matplotlib.legend.Legend at 0x79379d49b0a0>
test_acc = 0
step = 0
test_images = []
test_labels = []
predictions = []
correct_labels = pred.eq(target.data.view_as(pred))
acc = torch.mean(correct_labels.type(torch.FloatTensor))
test_acc += acc.item() * input.size(0)
test_images.append(input.cpu())
test_labels.append(target.cpu())
predictions.append(pred.cpu())
fig, ax = plt.subplots()
ax.imshow(img)
ax.set_title(f'Predicted: {vgg3.idx_to_class[pred[0].item()]}')
ax.set_axis_off()
writer.add_scalar('Testing Accuracy / Iteration', acc, global_step=step)
writer.add_figure('Test images & Label', fig, global_step=step)
step += 1
for i, ax in enumerate(axes):
if i < test_images.size(0):
img = test_images[i].squeeze().permute(1, 2, 0).numpy()
img = np.clip(img, 0, 1)
ax.imshow(img)
ax.set_title(f'Pred: {MLP.idx_to_class[predictions[i].item()]}', fontsize=10)
ax.set_axis_off()
plt.tight_layout()
writer.add_figure('All Test Images & Predictions', fig)
plt.show()
%reload_ext tensorboard
Filter runs (regex) Filter tags (regex) All Scalars Image Histogram Settings
Link by step 4
Card Width
SCALARS
Smoothing
0.6
Alphabetical
Testing Accuracy
Ignore outliers in chart scaling
1.2
HISTOGRAMS
1
Mode
0.8
Offset
Start coding or generate with AI.
keyboard_arrow_down Results
combined_table = pd.concat([vgg1_acc_loss_time.mean(),vgg3_acc_loss_time.mean(), vgg3_aug_acc_loss_time.mean(),vgg16_acc_loss_time.mean(
combined_table.index = ['VGG1','VGG3','VGG3_augmentation','VGG16_FC','VGG16_entire','MLP']
combined_table
keyboard_arrow_down Theory
Are the results as expected? Why or why not?
VGG1, being the simplest, shows lower accuracy compared to other models, which have more layers and can capture more complex features
from the images VGG3 and VGG3_aug should perform slightly better than VGG1 due to the increased depth and the effect of data
augmentation. Data augmentation (such as random rotations, flips, or color changes) usually improves the generalization of the model by
preventing overfitting on the training data, which may explain better performance
VGG16: This model is typically stronger in performance because it has many more layers, allowing it to capture finer details in images, as it has
more parameters and can capture complex features. When fine-tuning the entire VGG16 model, the entire model adapts to the specific task of
distinguishing between pandas and bears. Fine-tuning allows the model to better adjust the convolutional layers for specific features in the new
dataset.
MLP (Multilayer Perceptron): MLP models typically underperform on image data compared to CNN-based models like VGG. Since MLPs don’t
have convolutional layers, they don't capture spatial hierarchies effectively, making them less suitable for tasks involving images. However, if
trained properly, MLPs may still give reasonable performance on simpler datasets.
Data augmentation artificially increases the size and variability of the training dataset by applying random transformations (e.g., rotation,
flipping, zoom, cropping). This helps the models generalize better, especially in tasks like distinguishing between pandas and bears, where
visual differences might be subtle. For example, an ant on a leaf might look very similar to a dog in the background of an image. Augmentation
makes sure the model doesn’t memorize specific features of the training set but learns generalizable patterns.
Does it matter how many epochs you fine-tune the model? Why or why not?
Yes, the number of epochs is critical in fine-tuning. Underfitting and overfitting: If you fine-tune for too few epochs, the model won’t have enough
time to adjust its weights, especially the convolutional layers, to the new dataset (dogs vs ants). The model will perform poorly on the test set.
Are there any particular images that the model is confused about? Why or why not?
Intra-class variability: Both pandas and bears can appear in a variety of poses, colors, and sizes. The model could get confused if the dog or ant
in the image has an unusual appearance. For instance, a fluffy white dog may resemble a large white ant in certain environments.
Inter-class similarity: Ants and dogs may share visual similarities, especially in terms of color patterns, textures, and object sizes. For example,
ants may appear as small, textured, dark objects on a similarly colored background, which may resemble a dog’s fur in certain positions.
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
transform = transforms.Compose([
transforms.Resize((224, 224)),
t f T T ()