From 723812cfeddfded203dc4c2be749b64a960fe854 Mon Sep 17 00:00:00 2001 From: Geoff French Date: Tue, 21 Jul 2020 09:59:03 +0100 Subject: [PATCH 1/2] Softmax and log-softmax no longer applied in models. Evaluation and training programs now use `nn.CrossEntropyLoss` rather than `nn.NLLLoss`. --- eval.py | 5 +- eval_multipro.py | 5 +- mit_semseg/models/models.py | 101 ++++++++++++------------------------ test.py | 5 +- train.py | 2 +- 5 files changed, 39 insertions(+), 79 deletions(-) diff --git a/eval.py b/eval.py index b4a60ad7..68311a1a 100644 --- a/eval.py +++ b/eval.py @@ -116,10 +116,9 @@ def main(cfg, gpu): arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, - weights=cfg.MODEL.weights_decoder, - use_softmax=True) + weights=cfg.MODEL.weights_decoder) - crit = nn.NLLLoss(ignore_index=-1) + crit = nn.CrossEntropyLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) diff --git a/eval_multipro.py b/eval_multipro.py index db328cd3..e133115b 100644 --- a/eval_multipro.py +++ b/eval_multipro.py @@ -106,10 +106,9 @@ def worker(cfg, gpu_id, start_idx, end_idx, result_queue): arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, - weights=cfg.MODEL.weights_decoder, - use_softmax=True) + weights=cfg.MODEL.weights_decoder) - crit = nn.NLLLoss(ignore_index=-1) + crit = nn.CrossEntropyLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) diff --git a/mit_semseg/models/models.py b/mit_semseg/models/models.py index 793d2bd7..cf463375 100644 --- a/mit_semseg/models/models.py +++ b/mit_semseg/models/models.py @@ -29,22 +29,19 @@ def __init__(self, net_enc, net_dec, crit, deep_sup_scale=None): def forward(self, feed_dict, *, segSize=None): # training if segSize is None: - if self.deep_sup_scale is not None: # use deep supervision technique - (pred, pred_deepsup) = self.decoder(self.encoder(feed_dict['img_data'], return_feature_maps=True)) - else: - pred = self.decoder(self.encoder(feed_dict['img_data'], return_feature_maps=True)) + pred_dict = self.decoder(self.encoder(feed_dict['img_data'], return_feature_maps=True)) - loss = self.crit(pred, feed_dict['seg_label']) + loss = self.crit(pred_dict['logits'], feed_dict['seg_label']) if self.deep_sup_scale is not None: - loss_deepsup = self.crit(pred_deepsup, feed_dict['seg_label']) + loss_deepsup = self.crit(pred_dict['deepsup_logits'], feed_dict['seg_label']) loss = loss + loss_deepsup * self.deep_sup_scale - acc = self.pixel_acc(pred, feed_dict['seg_label']) + acc = self.pixel_acc(pred_dict['logits'], feed_dict['seg_label']) return loss, acc # inference else: - pred = self.decoder(self.encoder(feed_dict['img_data'], return_feature_maps=True), segSize=segSize) - return pred + pred_dict = self.decoder(self.encoder(feed_dict['img_data'], return_feature_maps=True), segSize=segSize) + return pred_dict['logits'] class ModelBuilder: @@ -112,39 +109,33 @@ def build_encoder(arch='resnet50dilated', fc_dim=512, weights=''): @staticmethod def build_decoder(arch='ppm_deepsup', fc_dim=512, num_class=150, - weights='', use_softmax=False): + weights=''): arch = arch.lower() if arch == 'c1_deepsup': net_decoder = C1DeepSup( num_class=num_class, - fc_dim=fc_dim, - use_softmax=use_softmax) + fc_dim=fc_dim) elif arch == 'c1': net_decoder = C1( num_class=num_class, - fc_dim=fc_dim, - use_softmax=use_softmax) + fc_dim=fc_dim) elif arch == 'ppm': net_decoder = PPM( num_class=num_class, - fc_dim=fc_dim, - use_softmax=use_softmax) + fc_dim=fc_dim) elif arch == 'ppm_deepsup': net_decoder = PPMDeepsup( num_class=num_class, - fc_dim=fc_dim, - use_softmax=use_softmax) + fc_dim=fc_dim) elif arch == 'upernet_lite': net_decoder = UPerNet( num_class=num_class, fc_dim=fc_dim, - use_softmax=use_softmax, fpn_dim=256) elif arch == 'upernet': net_decoder = UPerNet( num_class=num_class, fc_dim=fc_dim, - use_softmax=use_softmax, fpn_dim=512) else: raise Exception('Architecture undefined!') @@ -325,10 +316,8 @@ def forward(self, x, return_feature_maps=False): # last conv, deep supervision class C1DeepSup(nn.Module): - def __init__(self, num_class=150, fc_dim=2048, use_softmax=False): + def __init__(self, num_class=150, fc_dim=2048): super(C1DeepSup, self).__init__() - self.use_softmax = use_softmax - self.cbr = conv3x3_bn_relu(fc_dim, fc_dim // 4, 1) self.cbr_deepsup = conv3x3_bn_relu(fc_dim // 2, fc_dim // 4, 1) @@ -342,29 +331,22 @@ def forward(self, conv_out, segSize=None): x = self.cbr(conv5) x = self.conv_last(x) - if self.use_softmax: # is True during inference + if segSize is not None: x = nn.functional.interpolate( x, size=segSize, mode='bilinear', align_corners=False) - x = nn.functional.softmax(x, dim=1) - return x # deep sup conv4 = conv_out[-2] - _ = self.cbr_deepsup(conv4) - _ = self.conv_last_deepsup(_) + deepsup = self.cbr_deepsup(conv4) + deepsup = self.conv_last_deepsup(deepsup) - x = nn.functional.log_softmax(x, dim=1) - _ = nn.functional.log_softmax(_, dim=1) - - return (x, _) + return dict(logits=x, deepsup_logits=deepsup) # last conv class C1(nn.Module): - def __init__(self, num_class=150, fc_dim=2048, use_softmax=False): + def __init__(self, num_class=150, fc_dim=2048): super(C1, self).__init__() - self.use_softmax = use_softmax - self.cbr = conv3x3_bn_relu(fc_dim, fc_dim // 4, 1) # last conv @@ -375,23 +357,18 @@ def forward(self, conv_out, segSize=None): x = self.cbr(conv5) x = self.conv_last(x) - if self.use_softmax: # is True during inference + if segSize is not None: x = nn.functional.interpolate( x, size=segSize, mode='bilinear', align_corners=False) - x = nn.functional.softmax(x, dim=1) - else: - x = nn.functional.log_softmax(x, dim=1) - return x + return dict(logits=x) # pyramid pooling class PPM(nn.Module): def __init__(self, num_class=150, fc_dim=4096, - use_softmax=False, pool_scales=(1, 2, 3, 6)): + pool_scales=(1, 2, 3, 6)): super(PPM, self).__init__() - self.use_softmax = use_softmax - self.ppm = [] for scale in pool_scales: self.ppm.append(nn.Sequential( @@ -425,22 +402,18 @@ def forward(self, conv_out, segSize=None): x = self.conv_last(ppm_out) - if self.use_softmax: # is True during inference + if segSize is not None: x = nn.functional.interpolate( x, size=segSize, mode='bilinear', align_corners=False) - x = nn.functional.softmax(x, dim=1) - else: - x = nn.functional.log_softmax(x, dim=1) - return x + + return dict(logits=x) # pyramid pooling, deep supervision class PPMDeepsup(nn.Module): def __init__(self, num_class=150, fc_dim=4096, - use_softmax=False, pool_scales=(1, 2, 3, 6)): + pool_scales=(1, 2, 3, 6)): super(PPMDeepsup, self).__init__() - self.use_softmax = use_softmax - self.ppm = [] for scale in pool_scales: self.ppm.append(nn.Sequential( @@ -477,31 +450,25 @@ def forward(self, conv_out, segSize=None): x = self.conv_last(ppm_out) - if self.use_softmax: # is True during inference + if segSize is not None: x = nn.functional.interpolate( x, size=segSize, mode='bilinear', align_corners=False) - x = nn.functional.softmax(x, dim=1) - return x # deep sup conv4 = conv_out[-2] - _ = self.cbr_deepsup(conv4) - _ = self.dropout_deepsup(_) - _ = self.conv_last_deepsup(_) - - x = nn.functional.log_softmax(x, dim=1) - _ = nn.functional.log_softmax(_, dim=1) + deepsup = self.cbr_deepsup(conv4) + deepsup = self.dropout_deepsup(deepsup) + deepsup = self.conv_last_deepsup(deepsup) - return (x, _) + return dict(logits=x, deepsup_logits=deepsup) # upernet class UPerNet(nn.Module): def __init__(self, num_class=150, fc_dim=4096, - use_softmax=False, pool_scales=(1, 2, 3, 6), + pool_scales=(1, 2, 3, 6), fpn_inplanes=(256, 512, 1024, 2048), fpn_dim=256): super(UPerNet, self).__init__() - self.use_softmax = use_softmax # PPM Module self.ppm_pooling = [] @@ -575,12 +542,8 @@ def forward(self, conv_out, segSize=None): fusion_out = torch.cat(fusion_list, 1) x = self.conv_last(fusion_out) - if self.use_softmax: # is True during inference + if segSize is not None: x = nn.functional.interpolate( x, size=segSize, mode='bilinear', align_corners=False) - x = nn.functional.softmax(x, dim=1) - return x - - x = nn.functional.log_softmax(x, dim=1) - return x + return dict(logits=x) diff --git a/test.py b/test.py index a0a2eec2..494c4e09 100644 --- a/test.py +++ b/test.py @@ -103,10 +103,9 @@ def main(cfg, gpu): arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, - weights=cfg.MODEL.weights_decoder, - use_softmax=True) + weights=cfg.MODEL.weights_decoder) - crit = nn.NLLLoss(ignore_index=-1) + crit = nn.CrossEntropyLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) diff --git a/train.py b/train.py index cfe3b3ee..a602a7fa 100644 --- a/train.py +++ b/train.py @@ -151,7 +151,7 @@ def main(cfg, gpus): num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder) - crit = nn.NLLLoss(ignore_index=-1) + crit = nn.CrossEntropyLoss(ignore_index=-1) if cfg.MODEL.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule( From f491232f360af7fa05960229c4b4d4b0de0ef261 Mon Sep 17 00:00:00 2001 From: Geoff French Date: Tue, 21 Jul 2020 12:54:45 +0100 Subject: [PATCH 2/2] Apply interpolation to deep supervision logits. --- mit_semseg/models/models.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mit_semseg/models/models.py b/mit_semseg/models/models.py index cf463375..77e3d4a0 100644 --- a/mit_semseg/models/models.py +++ b/mit_semseg/models/models.py @@ -340,6 +340,10 @@ def forward(self, conv_out, segSize=None): deepsup = self.cbr_deepsup(conv4) deepsup = self.conv_last_deepsup(deepsup) + if segSize is not None: + deepsup = nn.functional.interpolate( + deepsup, size=segSize, mode='bilinear', align_corners=False) + return dict(logits=x, deepsup_logits=deepsup) @@ -460,6 +464,10 @@ def forward(self, conv_out, segSize=None): deepsup = self.dropout_deepsup(deepsup) deepsup = self.conv_last_deepsup(deepsup) + if segSize is not None: + deepsup = nn.functional.interpolate( + deepsup, size=segSize, mode='bilinear', align_corners=False) + return dict(logits=x, deepsup_logits=deepsup) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy