what.models.detection.frcnn.faster_rcnn

  1import os
  2import time
  3from collections import namedtuple
  4
  5import torch
  6from torch import nn
  7from torch.nn import functional as F
  8
  9from what.models.detection.frcnn.meter import ConfusionMeter, AverageValueMeter
 10
 11from what.models.detection.frcnn.model.utils.creator_tool import AnchorTargetCreator, ProposalTargetCreator
 12from what.models.detection.frcnn.model.faster_rcnn_vgg16 import FasterRCNNVGG16
 13from what.models.detection.utils.array_utils import to_numpy, to_scalar, to_tensor
 14from what.models.detection.frcnn.utils.config import opt
 15from what.models.detection.frcnn.utils.eval_tool import eval_detection_voc
 16
 17LossTuple = namedtuple('LossTuple',
 18                       ['rpn_loc_loss',
 19                        'rpn_cls_loss',
 20                        'roi_loc_loss',
 21                        'roi_cls_loss',
 22                        'total_loss'
 23                        ])
 24
 25def _smooth_l1_loss(x, t, in_weight, sigma):
 26    sigma2 = sigma ** 2
 27    diff = in_weight * (x - t)
 28    abs_diff = diff.abs()
 29    flag = (abs_diff.data < (1. / sigma2)).float()
 30    y = (flag * (sigma2 / 2.) * (diff ** 2) +
 31         (1 - flag) * (abs_diff - 0.5 / sigma2))
 32    return y.sum()
 33
 34def _fast_rcnn_loc_loss(pred_loc, gt_loc, gt_label, sigma):
 35    in_weight = torch.zeros(gt_loc.shape)
 36    # Localization loss is calculated only for positive rois.
 37    # NOTE:  unlike origin implementation, 
 38    # we don't need inside_weight and outside_weight, they can calculate by gt_label
 39    in_weight[(gt_label > 0).view(-1, 1).expand_as(in_weight)] = 1
 40    loc_loss = _smooth_l1_loss(pred_loc, gt_loc, in_weight.detach(), sigma)
 41    # Normalize by total number of negtive and positive rois.
 42    loc_loss /= ((gt_label >= 0).sum().float()) # ignore gt_label==-1 for rpn_loss
 43    return loc_loss
 44
 45class FasterRCNN(nn.Module):
 46    """wrapper for conveniently training. return losses
 47
 48    The losses include:
 49
 50    * :obj:`rpn_loc_loss`: The localization loss for \
 51        Region Proposal Network (RPN).
 52    * :obj:`rpn_cls_loss`: The classification loss for RPN.
 53    * :obj:`roi_loc_loss`: The localization loss for the head module.
 54    * :obj:`roi_cls_loss`: The classification loss for the head module.
 55    * :obj:`total_loss`: The sum of 4 loss above.
 56
 57    Args:
 58        faster_rcnn (model.FasterRCNN):
 59            A Faster R-CNN model that is going to be trained.
 60    """
 61
 62    def __init__(self, device=torch.device('cpu')):
 63        super(FasterRCNN, self).__init__()
 64
 65        self.device = device
 66
 67        self.faster_rcnn = FasterRCNNVGG16(device=device)
 68        self.rpn_sigma = opt.rpn_sigma
 69        self.roi_sigma = opt.roi_sigma
 70
 71        # target creator create gt_bbox gt_label etc as training targets. 
 72        self.anchor_target_creator = AnchorTargetCreator()
 73        self.proposal_target_creator = ProposalTargetCreator()
 74
 75        self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean
 76        self.loc_normalize_std = self.faster_rcnn.loc_normalize_std
 77
 78        self.optimizer = self.faster_rcnn.get_optimizer()
 79
 80        # indicators for training status
 81        self.rpn_cm = ConfusionMeter(2)
 82        self.roi_cm = ConfusionMeter(21)
 83        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss
 84
 85    def predict(self, img):
 86        return self.faster_rcnn.predict(img)
 87
 88    def forward(self, imgs, bboxes, labels, scale):
 89        """Forward Faster R-CNN and calculate losses.
 90
 91        Here are notations used.
 92
 93        * :math:`N` is the batch size.
 94        * :math:`R` is the number of bounding boxes per image.
 95
 96        Currently, only :math:`N=1` is supported.
 97
 98        Args:
 99            imgs (~torch.autograd.Variable): A variable with a batch of images.
100            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
101                Its shape is :math:`(N, R, 4)`.
102            labels (~torch.autograd..Variable): A batch of labels.
103                Its shape is :math:`(N, R)`. The background is excluded from
104                the definition, which means that the range of the value
105                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
106                classes.
107            scale (float): Amount of scaling applied to
108                the raw image during preprocessing.
109
110        Returns:
111            namedtuple of 5 losses
112        """
113        n = bboxes.shape[0]
114        if n != 1:
115            raise ValueError('Currently only batch size 1 is supported.')
116
117        _, _, H, W = imgs.shape
118        img_size = (H, W)
119
120        features = self.faster_rcnn.extractor(imgs)
121
122        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
123            self.faster_rcnn.rpn(features, img_size, scale)
124
125        # Since batch size is one, convert variables to singular form
126        bbox = bboxes[0]
127        label = labels[0]
128        rpn_score = rpn_scores[0]
129        rpn_loc = rpn_locs[0]
130        roi = rois
131
132        # Sample RoIs and forward
133        # it's fine to break the computation graph of rois, 
134        # consider them as constant input
135        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
136            roi,
137            to_numpy(bbox),
138            to_numpy(label),
139            self.loc_normalize_mean,
140            self.loc_normalize_std)
141        # NOTE it's all zero because now it only support for batch=1 now
142        sample_roi_index = torch.zeros(len(sample_roi))
143        roi_cls_loc, roi_score = self.faster_rcnn.head(
144            features,
145            sample_roi,
146            sample_roi_index)
147
148        # ------------------ RPN losses -------------------#
149        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
150            to_numpy(bbox),
151            anchor,
152            img_size)
153        gt_rpn_label = to_tensor(gt_rpn_label, self.device).long()
154        gt_rpn_loc = to_tensor(gt_rpn_loc, self.device)
155        rpn_loc_loss = _fast_rcnn_loc_loss(
156            rpn_loc,
157            gt_rpn_loc,
158            gt_rpn_label.data,
159            self.rpn_sigma)
160
161        # NOTE: default value of ignore_index is -100 ...
162        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1)
163        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
164        _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1]
165        self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long())
166
167        # ------------------ ROI losses (fast rcnn loss) -------------------#
168        n_sample = roi_cls_loc.shape[0]
169        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
170        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \
171                              to_tensor(gt_roi_label, self.device).long()]
172        gt_roi_label = to_tensor(gt_roi_label, self.device).long()
173        gt_roi_loc = to_tensor(gt_roi_loc, self.device)
174
175        roi_loc_loss = _fast_rcnn_loc_loss(
176            roi_loc.contiguous(),
177            gt_roi_loc,
178            gt_roi_label.data,
179            self.roi_sigma)
180
181        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label)
182
183        self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long())
184
185        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
186        losses = losses + [sum(losses)]
187
188        return LossTuple(*losses)
189
190    def step(self, imgs, bboxes, labels, scale):
191        self.optimizer.zero_grad()
192        losses = self.forward(imgs, bboxes, labels, scale)
193        losses.total_loss.backward()
194        self.optimizer.step()
195        self.update_meters(losses)
196        return losses
197
198    def eval(self, val_loader, test_num=10000):
199
200        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
201        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
202
203        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader):
204            sizes = [sizes[0][0], sizes[1][0]]
205            pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes])
206            gt_bboxes += list(gt_bboxes_.numpy())
207            gt_labels += list(gt_labels_.numpy())
208            gt_difficults += list(gt_difficults_.numpy())
209            pred_bboxes += pred_bboxes_
210            pred_labels += pred_labels_
211            pred_scores += pred_scores_
212            if ii == test_num: break
213
214        result = eval_detection_voc(
215            pred_bboxes, pred_labels, pred_scores,
216            gt_bboxes, gt_labels, gt_difficults,
217            use_07_metric=True)
218        return result
219
220
221    def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')):
222        state_dict = torch.load(path, map_location=map_location)
223        if 'model' in state_dict:
224            self.faster_rcnn.load_state_dict(state_dict['model'])
225        else:  # legacy way, for backward compatibility
226            self.faster_rcnn.load_state_dict(state_dict)
227            return self
228        if parse_opt:
229            opt._parse(state_dict['config'])
230        if 'optimizer' in state_dict and load_optimizer:
231            self.optimizer.load_state_dict(state_dict['optimizer'])
232        return self
233
234    def save(self, save_optimizer=False, save_path=None, **kwargs):
235        """serialize models include optimizer and other info
236        return path where the model-file is stored.
237
238        Args:
239            save_optimizer (bool): whether save optimizer.state_dict().
240            save_path (string): where to save model, if it's None, save_path
241                is generate using time str and info from kwargs.
242        
243        Returns:
244            save_path(str): the path to save models.
245        """
246        save_dict = dict()
247
248        save_dict['model'] = self.faster_rcnn.state_dict()
249        save_dict['config'] = opt._state_dict()
250        save_dict['other_info'] = kwargs
251        # save_dict['vis_info'] = self.vis.state_dict()
252
253        if save_optimizer:
254            save_dict['optimizer'] = self.optimizer.state_dict()
255
256        if save_path is None:
257            timestr = time.strftime('%m%d%H%M')
258            save_path = 'checkpoints/fasterrcnn_%s' % timestr
259            for k_, v_ in kwargs.items():
260                save_path += '_%s' % v_
261
262        save_dir = os.path.dirname(save_path)
263        if not os.path.exists(save_dir):
264            os.makedirs(save_dir)
265
266        torch.save(save_dict, save_path)
267        # self.vis.save([self.vis.env])
268        return save_path
269
270    def update_meters(self, losses):
271        loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()}
272        for key, meter in self.meters.items():
273            meter.add(loss_d[key])
274
275    def reset_meters(self):
276        for key, meter in self.meters.items():
277            meter.reset()
278        self.roi_cm.reset()
279        self.rpn_cm.reset()
280
281    def get_meter_data(self):
282        return {k: v.value()[0] for k, v in self.meters.items()}
class LossTuple(builtins.tuple):

LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)

LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)

Create new instance of LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)

rpn_loc_loss

Alias for field number 0

rpn_cls_loss

Alias for field number 1

roi_loc_loss

Alias for field number 2

roi_cls_loss

Alias for field number 3

total_loss

Alias for field number 4

Inherited Members
builtins.tuple
index
count
class FasterRCNN(torch.nn.modules.module.Module):
 46class FasterRCNN(nn.Module):
 47    """wrapper for conveniently training. return losses
 48
 49    The losses include:
 50
 51    * :obj:`rpn_loc_loss`: The localization loss for \
 52        Region Proposal Network (RPN).
 53    * :obj:`rpn_cls_loss`: The classification loss for RPN.
 54    * :obj:`roi_loc_loss`: The localization loss for the head module.
 55    * :obj:`roi_cls_loss`: The classification loss for the head module.
 56    * :obj:`total_loss`: The sum of 4 loss above.
 57
 58    Args:
 59        faster_rcnn (model.FasterRCNN):
 60            A Faster R-CNN model that is going to be trained.
 61    """
 62
 63    def __init__(self, device=torch.device('cpu')):
 64        super(FasterRCNN, self).__init__()
 65
 66        self.device = device
 67
 68        self.faster_rcnn = FasterRCNNVGG16(device=device)
 69        self.rpn_sigma = opt.rpn_sigma
 70        self.roi_sigma = opt.roi_sigma
 71
 72        # target creator create gt_bbox gt_label etc as training targets. 
 73        self.anchor_target_creator = AnchorTargetCreator()
 74        self.proposal_target_creator = ProposalTargetCreator()
 75
 76        self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean
 77        self.loc_normalize_std = self.faster_rcnn.loc_normalize_std
 78
 79        self.optimizer = self.faster_rcnn.get_optimizer()
 80
 81        # indicators for training status
 82        self.rpn_cm = ConfusionMeter(2)
 83        self.roi_cm = ConfusionMeter(21)
 84        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss
 85
 86    def predict(self, img):
 87        return self.faster_rcnn.predict(img)
 88
 89    def forward(self, imgs, bboxes, labels, scale):
 90        """Forward Faster R-CNN and calculate losses.
 91
 92        Here are notations used.
 93
 94        * :math:`N` is the batch size.
 95        * :math:`R` is the number of bounding boxes per image.
 96
 97        Currently, only :math:`N=1` is supported.
 98
 99        Args:
100            imgs (~torch.autograd.Variable): A variable with a batch of images.
101            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
102                Its shape is :math:`(N, R, 4)`.
103            labels (~torch.autograd..Variable): A batch of labels.
104                Its shape is :math:`(N, R)`. The background is excluded from
105                the definition, which means that the range of the value
106                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
107                classes.
108            scale (float): Amount of scaling applied to
109                the raw image during preprocessing.
110
111        Returns:
112            namedtuple of 5 losses
113        """
114        n = bboxes.shape[0]
115        if n != 1:
116            raise ValueError('Currently only batch size 1 is supported.')
117
118        _, _, H, W = imgs.shape
119        img_size = (H, W)
120
121        features = self.faster_rcnn.extractor(imgs)
122
123        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
124            self.faster_rcnn.rpn(features, img_size, scale)
125
126        # Since batch size is one, convert variables to singular form
127        bbox = bboxes[0]
128        label = labels[0]
129        rpn_score = rpn_scores[0]
130        rpn_loc = rpn_locs[0]
131        roi = rois
132
133        # Sample RoIs and forward
134        # it's fine to break the computation graph of rois, 
135        # consider them as constant input
136        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
137            roi,
138            to_numpy(bbox),
139            to_numpy(label),
140            self.loc_normalize_mean,
141            self.loc_normalize_std)
142        # NOTE it's all zero because now it only support for batch=1 now
143        sample_roi_index = torch.zeros(len(sample_roi))
144        roi_cls_loc, roi_score = self.faster_rcnn.head(
145            features,
146            sample_roi,
147            sample_roi_index)
148
149        # ------------------ RPN losses -------------------#
150        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
151            to_numpy(bbox),
152            anchor,
153            img_size)
154        gt_rpn_label = to_tensor(gt_rpn_label, self.device).long()
155        gt_rpn_loc = to_tensor(gt_rpn_loc, self.device)
156        rpn_loc_loss = _fast_rcnn_loc_loss(
157            rpn_loc,
158            gt_rpn_loc,
159            gt_rpn_label.data,
160            self.rpn_sigma)
161
162        # NOTE: default value of ignore_index is -100 ...
163        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1)
164        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
165        _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1]
166        self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long())
167
168        # ------------------ ROI losses (fast rcnn loss) -------------------#
169        n_sample = roi_cls_loc.shape[0]
170        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
171        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \
172                              to_tensor(gt_roi_label, self.device).long()]
173        gt_roi_label = to_tensor(gt_roi_label, self.device).long()
174        gt_roi_loc = to_tensor(gt_roi_loc, self.device)
175
176        roi_loc_loss = _fast_rcnn_loc_loss(
177            roi_loc.contiguous(),
178            gt_roi_loc,
179            gt_roi_label.data,
180            self.roi_sigma)
181
182        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label)
183
184        self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long())
185
186        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
187        losses = losses + [sum(losses)]
188
189        return LossTuple(*losses)
190
191    def step(self, imgs, bboxes, labels, scale):
192        self.optimizer.zero_grad()
193        losses = self.forward(imgs, bboxes, labels, scale)
194        losses.total_loss.backward()
195        self.optimizer.step()
196        self.update_meters(losses)
197        return losses
198
199    def eval(self, val_loader, test_num=10000):
200
201        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
202        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
203
204        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader):
205            sizes = [sizes[0][0], sizes[1][0]]
206            pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes])
207            gt_bboxes += list(gt_bboxes_.numpy())
208            gt_labels += list(gt_labels_.numpy())
209            gt_difficults += list(gt_difficults_.numpy())
210            pred_bboxes += pred_bboxes_
211            pred_labels += pred_labels_
212            pred_scores += pred_scores_
213            if ii == test_num: break
214
215        result = eval_detection_voc(
216            pred_bboxes, pred_labels, pred_scores,
217            gt_bboxes, gt_labels, gt_difficults,
218            use_07_metric=True)
219        return result
220
221
222    def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')):
223        state_dict = torch.load(path, map_location=map_location)
224        if 'model' in state_dict:
225            self.faster_rcnn.load_state_dict(state_dict['model'])
226        else:  # legacy way, for backward compatibility
227            self.faster_rcnn.load_state_dict(state_dict)
228            return self
229        if parse_opt:
230            opt._parse(state_dict['config'])
231        if 'optimizer' in state_dict and load_optimizer:
232            self.optimizer.load_state_dict(state_dict['optimizer'])
233        return self
234
235    def save(self, save_optimizer=False, save_path=None, **kwargs):
236        """serialize models include optimizer and other info
237        return path where the model-file is stored.
238
239        Args:
240            save_optimizer (bool): whether save optimizer.state_dict().
241            save_path (string): where to save model, if it's None, save_path
242                is generate using time str and info from kwargs.
243        
244        Returns:
245            save_path(str): the path to save models.
246        """
247        save_dict = dict()
248
249        save_dict['model'] = self.faster_rcnn.state_dict()
250        save_dict['config'] = opt._state_dict()
251        save_dict['other_info'] = kwargs
252        # save_dict['vis_info'] = self.vis.state_dict()
253
254        if save_optimizer:
255            save_dict['optimizer'] = self.optimizer.state_dict()
256
257        if save_path is None:
258            timestr = time.strftime('%m%d%H%M')
259            save_path = 'checkpoints/fasterrcnn_%s' % timestr
260            for k_, v_ in kwargs.items():
261                save_path += '_%s' % v_
262
263        save_dir = os.path.dirname(save_path)
264        if not os.path.exists(save_dir):
265            os.makedirs(save_dir)
266
267        torch.save(save_dict, save_path)
268        # self.vis.save([self.vis.env])
269        return save_path
270
271    def update_meters(self, losses):
272        loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()}
273        for key, meter in self.meters.items():
274            meter.add(loss_d[key])
275
276    def reset_meters(self):
277        for key, meter in self.meters.items():
278            meter.reset()
279        self.roi_cm.reset()
280        self.rpn_cm.reset()
281
282    def get_meter_data(self):
283        return {k: v.value()[0] for k, v in self.meters.items()}

wrapper for conveniently training. return losses

The losses include:

  • rpn_loc_loss: The localization loss for Region Proposal Network (RPN).
  • rpn_cls_loss: The classification loss for RPN.
  • roi_loc_loss: The localization loss for the head module.
  • roi_cls_loss: The classification loss for the head module.
  • total_loss: The sum of 4 loss above.

Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained.

FasterRCNN(device=device(type='cpu'))
63    def __init__(self, device=torch.device('cpu')):
64        super(FasterRCNN, self).__init__()
65
66        self.device = device
67
68        self.faster_rcnn = FasterRCNNVGG16(device=device)
69        self.rpn_sigma = opt.rpn_sigma
70        self.roi_sigma = opt.roi_sigma
71
72        # target creator create gt_bbox gt_label etc as training targets. 
73        self.anchor_target_creator = AnchorTargetCreator()
74        self.proposal_target_creator = ProposalTargetCreator()
75
76        self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean
77        self.loc_normalize_std = self.faster_rcnn.loc_normalize_std
78
79        self.optimizer = self.faster_rcnn.get_optimizer()
80
81        # indicators for training status
82        self.rpn_cm = ConfusionMeter(2)
83        self.roi_cm = ConfusionMeter(21)
84        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss

Initializes internal Module state, shared by both nn.Module and ScriptModule.

def predict(self, img):
86    def predict(self, img):
87        return self.faster_rcnn.predict(img)
def forward(self, imgs, bboxes, labels, scale):
 89    def forward(self, imgs, bboxes, labels, scale):
 90        """Forward Faster R-CNN and calculate losses.
 91
 92        Here are notations used.
 93
 94        * :math:`N` is the batch size.
 95        * :math:`R` is the number of bounding boxes per image.
 96
 97        Currently, only :math:`N=1` is supported.
 98
 99        Args:
100            imgs (~torch.autograd.Variable): A variable with a batch of images.
101            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
102                Its shape is :math:`(N, R, 4)`.
103            labels (~torch.autograd..Variable): A batch of labels.
104                Its shape is :math:`(N, R)`. The background is excluded from
105                the definition, which means that the range of the value
106                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
107                classes.
108            scale (float): Amount of scaling applied to
109                the raw image during preprocessing.
110
111        Returns:
112            namedtuple of 5 losses
113        """
114        n = bboxes.shape[0]
115        if n != 1:
116            raise ValueError('Currently only batch size 1 is supported.')
117
118        _, _, H, W = imgs.shape
119        img_size = (H, W)
120
121        features = self.faster_rcnn.extractor(imgs)
122
123        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
124            self.faster_rcnn.rpn(features, img_size, scale)
125
126        # Since batch size is one, convert variables to singular form
127        bbox = bboxes[0]
128        label = labels[0]
129        rpn_score = rpn_scores[0]
130        rpn_loc = rpn_locs[0]
131        roi = rois
132
133        # Sample RoIs and forward
134        # it's fine to break the computation graph of rois, 
135        # consider them as constant input
136        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
137            roi,
138            to_numpy(bbox),
139            to_numpy(label),
140            self.loc_normalize_mean,
141            self.loc_normalize_std)
142        # NOTE it's all zero because now it only support for batch=1 now
143        sample_roi_index = torch.zeros(len(sample_roi))
144        roi_cls_loc, roi_score = self.faster_rcnn.head(
145            features,
146            sample_roi,
147            sample_roi_index)
148
149        # ------------------ RPN losses -------------------#
150        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
151            to_numpy(bbox),
152            anchor,
153            img_size)
154        gt_rpn_label = to_tensor(gt_rpn_label, self.device).long()
155        gt_rpn_loc = to_tensor(gt_rpn_loc, self.device)
156        rpn_loc_loss = _fast_rcnn_loc_loss(
157            rpn_loc,
158            gt_rpn_loc,
159            gt_rpn_label.data,
160            self.rpn_sigma)
161
162        # NOTE: default value of ignore_index is -100 ...
163        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1)
164        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
165        _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1]
166        self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long())
167
168        # ------------------ ROI losses (fast rcnn loss) -------------------#
169        n_sample = roi_cls_loc.shape[0]
170        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
171        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \
172                              to_tensor(gt_roi_label, self.device).long()]
173        gt_roi_label = to_tensor(gt_roi_label, self.device).long()
174        gt_roi_loc = to_tensor(gt_roi_loc, self.device)
175
176        roi_loc_loss = _fast_rcnn_loc_loss(
177            roi_loc.contiguous(),
178            gt_roi_loc,
179            gt_roi_label.data,
180            self.roi_sigma)
181
182        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label)
183
184        self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long())
185
186        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
187        losses = losses + [sum(losses)]
188
189        return LossTuple(*losses)

Forward Faster R-CNN and calculate losses.

Here are notations used.

  • \( N \) is the batch size.
  • \( R \) is the number of bounding boxes per image.

Currently, only \( N=1 \) is supported.

Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is \( (N, R, 4) \). labels (~torch.autograd..Variable): A batch of labels. Its shape is \( (N, R) \). The background is excluded from the definition, which means that the range of the value is \( [0, L - 1] \). \( L \) is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing.

Returns: namedtuple of 5 losses

def step(self, imgs, bboxes, labels, scale):
191    def step(self, imgs, bboxes, labels, scale):
192        self.optimizer.zero_grad()
193        losses = self.forward(imgs, bboxes, labels, scale)
194        losses.total_loss.backward()
195        self.optimizer.step()
196        self.update_meters(losses)
197        return losses
def eval(self, val_loader, test_num=10000):
199    def eval(self, val_loader, test_num=10000):
200
201        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
202        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
203
204        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader):
205            sizes = [sizes[0][0], sizes[1][0]]
206            pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes])
207            gt_bboxes += list(gt_bboxes_.numpy())
208            gt_labels += list(gt_labels_.numpy())
209            gt_difficults += list(gt_difficults_.numpy())
210            pred_bboxes += pred_bboxes_
211            pred_labels += pred_labels_
212            pred_scores += pred_scores_
213            if ii == test_num: break
214
215        result = eval_detection_voc(
216            pred_bboxes, pred_labels, pred_scores,
217            gt_bboxes, gt_labels, gt_difficults,
218            use_07_metric=True)
219        return result

Sets the module in evaluation mode.

This has any effect only on certain modules. See documentations of particular modules for details of their behaviors in training/evaluation mode, if they are affected, e.g. Dropout, BatchNorm, etc.

This is equivalent with self.train(False) <torch.nn.Module.train>().

See :ref:locally-disable-grad-doc for a comparison between .eval() and several similar mechanisms that may be confused with it.

Returns: Module: self

def load( self, path, load_optimizer=True, parse_opt=False, map_location=device(type='cpu')):
222    def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')):
223        state_dict = torch.load(path, map_location=map_location)
224        if 'model' in state_dict:
225            self.faster_rcnn.load_state_dict(state_dict['model'])
226        else:  # legacy way, for backward compatibility
227            self.faster_rcnn.load_state_dict(state_dict)
228            return self
229        if parse_opt:
230            opt._parse(state_dict['config'])
231        if 'optimizer' in state_dict and load_optimizer:
232            self.optimizer.load_state_dict(state_dict['optimizer'])
233        return self
def save(self, save_optimizer=False, save_path=None, **kwargs):
235    def save(self, save_optimizer=False, save_path=None, **kwargs):
236        """serialize models include optimizer and other info
237        return path where the model-file is stored.
238
239        Args:
240            save_optimizer (bool): whether save optimizer.state_dict().
241            save_path (string): where to save model, if it's None, save_path
242                is generate using time str and info from kwargs.
243        
244        Returns:
245            save_path(str): the path to save models.
246        """
247        save_dict = dict()
248
249        save_dict['model'] = self.faster_rcnn.state_dict()
250        save_dict['config'] = opt._state_dict()
251        save_dict['other_info'] = kwargs
252        # save_dict['vis_info'] = self.vis.state_dict()
253
254        if save_optimizer:
255            save_dict['optimizer'] = self.optimizer.state_dict()
256
257        if save_path is None:
258            timestr = time.strftime('%m%d%H%M')
259            save_path = 'checkpoints/fasterrcnn_%s' % timestr
260            for k_, v_ in kwargs.items():
261                save_path += '_%s' % v_
262
263        save_dir = os.path.dirname(save_path)
264        if not os.path.exists(save_dir):
265            os.makedirs(save_dir)
266
267        torch.save(save_dict, save_path)
268        # self.vis.save([self.vis.env])
269        return save_path

serialize models include optimizer and other info return path where the model-file is stored.

Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs.

Returns: save_path(str): the path to save models.

def update_meters(self, losses):
271    def update_meters(self, losses):
272        loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()}
273        for key, meter in self.meters.items():
274            meter.add(loss_d[key])
def reset_meters(self):
276    def reset_meters(self):
277        for key, meter in self.meters.items():
278            meter.reset()
279        self.roi_cm.reset()
280        self.rpn_cm.reset()
def get_meter_data(self):
282    def get_meter_data(self):
283        return {k: v.value()[0] for k, v in self.meters.items()}
Inherited Members
torch.nn.modules.module.Module
dump_patches
register_buffer
register_parameter
add_module
register_module
get_submodule
get_parameter
get_buffer
get_extra_state
set_extra_state
apply
cuda
xpu
cpu
type
float
double
half
bfloat16
to_empty
to
register_backward_hook
register_full_backward_hook
register_forward_pre_hook
register_forward_hook
state_dict
load_state_dict
parameters
named_parameters
buffers
named_buffers
children
named_children
modules
named_modules
train
requires_grad_
zero_grad
share_memory
extra_repr