what.models.detection.frcnn.faster_rcnn
1import os 2import time 3from collections import namedtuple 4 5import torch 6from torch import nn 7from torch.nn import functional as F 8 9from what.models.detection.frcnn.meter import ConfusionMeter, AverageValueMeter 10 11from what.models.detection.frcnn.model.utils.creator_tool import AnchorTargetCreator, ProposalTargetCreator 12from what.models.detection.frcnn.model.faster_rcnn_vgg16 import FasterRCNNVGG16 13from what.models.detection.utils.array_utils import to_numpy, to_scalar, to_tensor 14from what.models.detection.frcnn.utils.config import opt 15from what.models.detection.frcnn.utils.eval_tool import eval_detection_voc 16 17LossTuple = namedtuple('LossTuple', 18 ['rpn_loc_loss', 19 'rpn_cls_loss', 20 'roi_loc_loss', 21 'roi_cls_loss', 22 'total_loss' 23 ]) 24 25def _smooth_l1_loss(x, t, in_weight, sigma): 26 sigma2 = sigma ** 2 27 diff = in_weight * (x - t) 28 abs_diff = diff.abs() 29 flag = (abs_diff.data < (1. / sigma2)).float() 30 y = (flag * (sigma2 / 2.) * (diff ** 2) + 31 (1 - flag) * (abs_diff - 0.5 / sigma2)) 32 return y.sum() 33 34def _fast_rcnn_loc_loss(pred_loc, gt_loc, gt_label, sigma): 35 in_weight = torch.zeros(gt_loc.shape) 36 # Localization loss is calculated only for positive rois. 37 # NOTE: unlike origin implementation, 38 # we don't need inside_weight and outside_weight, they can calculate by gt_label 39 in_weight[(gt_label > 0).view(-1, 1).expand_as(in_weight)] = 1 40 loc_loss = _smooth_l1_loss(pred_loc, gt_loc, in_weight.detach(), sigma) 41 # Normalize by total number of negtive and positive rois. 42 loc_loss /= ((gt_label >= 0).sum().float()) # ignore gt_label==-1 for rpn_loss 43 return loc_loss 44 45class FasterRCNN(nn.Module): 46 """wrapper for conveniently training. return losses 47 48 The losses include: 49 50 * :obj:`rpn_loc_loss`: The localization loss for \ 51 Region Proposal Network (RPN). 52 * :obj:`rpn_cls_loss`: The classification loss for RPN. 53 * :obj:`roi_loc_loss`: The localization loss for the head module. 54 * :obj:`roi_cls_loss`: The classification loss for the head module. 55 * :obj:`total_loss`: The sum of 4 loss above. 56 57 Args: 58 faster_rcnn (model.FasterRCNN): 59 A Faster R-CNN model that is going to be trained. 60 """ 61 62 def __init__(self, device=torch.device('cpu')): 63 super(FasterRCNN, self).__init__() 64 65 self.device = device 66 67 self.faster_rcnn = FasterRCNNVGG16(device=device) 68 self.rpn_sigma = opt.rpn_sigma 69 self.roi_sigma = opt.roi_sigma 70 71 # target creator create gt_bbox gt_label etc as training targets. 72 self.anchor_target_creator = AnchorTargetCreator() 73 self.proposal_target_creator = ProposalTargetCreator() 74 75 self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean 76 self.loc_normalize_std = self.faster_rcnn.loc_normalize_std 77 78 self.optimizer = self.faster_rcnn.get_optimizer() 79 80 # indicators for training status 81 self.rpn_cm = ConfusionMeter(2) 82 self.roi_cm = ConfusionMeter(21) 83 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss 84 85 def predict(self, img): 86 return self.faster_rcnn.predict(img) 87 88 def forward(self, imgs, bboxes, labels, scale): 89 """Forward Faster R-CNN and calculate losses. 90 91 Here are notations used. 92 93 * :math:`N` is the batch size. 94 * :math:`R` is the number of bounding boxes per image. 95 96 Currently, only :math:`N=1` is supported. 97 98 Args: 99 imgs (~torch.autograd.Variable): A variable with a batch of images. 100 bboxes (~torch.autograd.Variable): A batch of bounding boxes. 101 Its shape is :math:`(N, R, 4)`. 102 labels (~torch.autograd..Variable): A batch of labels. 103 Its shape is :math:`(N, R)`. The background is excluded from 104 the definition, which means that the range of the value 105 is :math:`[0, L - 1]`. :math:`L` is the number of foreground 106 classes. 107 scale (float): Amount of scaling applied to 108 the raw image during preprocessing. 109 110 Returns: 111 namedtuple of 5 losses 112 """ 113 n = bboxes.shape[0] 114 if n != 1: 115 raise ValueError('Currently only batch size 1 is supported.') 116 117 _, _, H, W = imgs.shape 118 img_size = (H, W) 119 120 features = self.faster_rcnn.extractor(imgs) 121 122 rpn_locs, rpn_scores, rois, roi_indices, anchor = \ 123 self.faster_rcnn.rpn(features, img_size, scale) 124 125 # Since batch size is one, convert variables to singular form 126 bbox = bboxes[0] 127 label = labels[0] 128 rpn_score = rpn_scores[0] 129 rpn_loc = rpn_locs[0] 130 roi = rois 131 132 # Sample RoIs and forward 133 # it's fine to break the computation graph of rois, 134 # consider them as constant input 135 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( 136 roi, 137 to_numpy(bbox), 138 to_numpy(label), 139 self.loc_normalize_mean, 140 self.loc_normalize_std) 141 # NOTE it's all zero because now it only support for batch=1 now 142 sample_roi_index = torch.zeros(len(sample_roi)) 143 roi_cls_loc, roi_score = self.faster_rcnn.head( 144 features, 145 sample_roi, 146 sample_roi_index) 147 148 # ------------------ RPN losses -------------------# 149 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( 150 to_numpy(bbox), 151 anchor, 152 img_size) 153 gt_rpn_label = to_tensor(gt_rpn_label, self.device).long() 154 gt_rpn_loc = to_tensor(gt_rpn_loc, self.device) 155 rpn_loc_loss = _fast_rcnn_loc_loss( 156 rpn_loc, 157 gt_rpn_loc, 158 gt_rpn_label.data, 159 self.rpn_sigma) 160 161 # NOTE: default value of ignore_index is -100 ... 162 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1) 163 _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] 164 _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1] 165 self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long()) 166 167 # ------------------ ROI losses (fast rcnn loss) -------------------# 168 n_sample = roi_cls_loc.shape[0] 169 roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) 170 roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \ 171 to_tensor(gt_roi_label, self.device).long()] 172 gt_roi_label = to_tensor(gt_roi_label, self.device).long() 173 gt_roi_loc = to_tensor(gt_roi_loc, self.device) 174 175 roi_loc_loss = _fast_rcnn_loc_loss( 176 roi_loc.contiguous(), 177 gt_roi_loc, 178 gt_roi_label.data, 179 self.roi_sigma) 180 181 roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label) 182 183 self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long()) 184 185 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] 186 losses = losses + [sum(losses)] 187 188 return LossTuple(*losses) 189 190 def step(self, imgs, bboxes, labels, scale): 191 self.optimizer.zero_grad() 192 losses = self.forward(imgs, bboxes, labels, scale) 193 losses.total_loss.backward() 194 self.optimizer.step() 195 self.update_meters(losses) 196 return losses 197 198 def eval(self, val_loader, test_num=10000): 199 200 pred_bboxes, pred_labels, pred_scores = list(), list(), list() 201 gt_bboxes, gt_labels, gt_difficults = list(), list(), list() 202 203 for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader): 204 sizes = [sizes[0][0], sizes[1][0]] 205 pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes]) 206 gt_bboxes += list(gt_bboxes_.numpy()) 207 gt_labels += list(gt_labels_.numpy()) 208 gt_difficults += list(gt_difficults_.numpy()) 209 pred_bboxes += pred_bboxes_ 210 pred_labels += pred_labels_ 211 pred_scores += pred_scores_ 212 if ii == test_num: break 213 214 result = eval_detection_voc( 215 pred_bboxes, pred_labels, pred_scores, 216 gt_bboxes, gt_labels, gt_difficults, 217 use_07_metric=True) 218 return result 219 220 221 def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')): 222 state_dict = torch.load(path, map_location=map_location) 223 if 'model' in state_dict: 224 self.faster_rcnn.load_state_dict(state_dict['model']) 225 else: # legacy way, for backward compatibility 226 self.faster_rcnn.load_state_dict(state_dict) 227 return self 228 if parse_opt: 229 opt._parse(state_dict['config']) 230 if 'optimizer' in state_dict and load_optimizer: 231 self.optimizer.load_state_dict(state_dict['optimizer']) 232 return self 233 234 def save(self, save_optimizer=False, save_path=None, **kwargs): 235 """serialize models include optimizer and other info 236 return path where the model-file is stored. 237 238 Args: 239 save_optimizer (bool): whether save optimizer.state_dict(). 240 save_path (string): where to save model, if it's None, save_path 241 is generate using time str and info from kwargs. 242 243 Returns: 244 save_path(str): the path to save models. 245 """ 246 save_dict = dict() 247 248 save_dict['model'] = self.faster_rcnn.state_dict() 249 save_dict['config'] = opt._state_dict() 250 save_dict['other_info'] = kwargs 251 # save_dict['vis_info'] = self.vis.state_dict() 252 253 if save_optimizer: 254 save_dict['optimizer'] = self.optimizer.state_dict() 255 256 if save_path is None: 257 timestr = time.strftime('%m%d%H%M') 258 save_path = 'checkpoints/fasterrcnn_%s' % timestr 259 for k_, v_ in kwargs.items(): 260 save_path += '_%s' % v_ 261 262 save_dir = os.path.dirname(save_path) 263 if not os.path.exists(save_dir): 264 os.makedirs(save_dir) 265 266 torch.save(save_dict, save_path) 267 # self.vis.save([self.vis.env]) 268 return save_path 269 270 def update_meters(self, losses): 271 loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()} 272 for key, meter in self.meters.items(): 273 meter.add(loss_d[key]) 274 275 def reset_meters(self): 276 for key, meter in self.meters.items(): 277 meter.reset() 278 self.roi_cm.reset() 279 self.rpn_cm.reset() 280 281 def get_meter_data(self): 282 return {k: v.value()[0] for k, v in self.meters.items()}
LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)
Create new instance of LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)
Inherited Members
- builtins.tuple
- index
- count
46class FasterRCNN(nn.Module): 47 """wrapper for conveniently training. return losses 48 49 The losses include: 50 51 * :obj:`rpn_loc_loss`: The localization loss for \ 52 Region Proposal Network (RPN). 53 * :obj:`rpn_cls_loss`: The classification loss for RPN. 54 * :obj:`roi_loc_loss`: The localization loss for the head module. 55 * :obj:`roi_cls_loss`: The classification loss for the head module. 56 * :obj:`total_loss`: The sum of 4 loss above. 57 58 Args: 59 faster_rcnn (model.FasterRCNN): 60 A Faster R-CNN model that is going to be trained. 61 """ 62 63 def __init__(self, device=torch.device('cpu')): 64 super(FasterRCNN, self).__init__() 65 66 self.device = device 67 68 self.faster_rcnn = FasterRCNNVGG16(device=device) 69 self.rpn_sigma = opt.rpn_sigma 70 self.roi_sigma = opt.roi_sigma 71 72 # target creator create gt_bbox gt_label etc as training targets. 73 self.anchor_target_creator = AnchorTargetCreator() 74 self.proposal_target_creator = ProposalTargetCreator() 75 76 self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean 77 self.loc_normalize_std = self.faster_rcnn.loc_normalize_std 78 79 self.optimizer = self.faster_rcnn.get_optimizer() 80 81 # indicators for training status 82 self.rpn_cm = ConfusionMeter(2) 83 self.roi_cm = ConfusionMeter(21) 84 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss 85 86 def predict(self, img): 87 return self.faster_rcnn.predict(img) 88 89 def forward(self, imgs, bboxes, labels, scale): 90 """Forward Faster R-CNN and calculate losses. 91 92 Here are notations used. 93 94 * :math:`N` is the batch size. 95 * :math:`R` is the number of bounding boxes per image. 96 97 Currently, only :math:`N=1` is supported. 98 99 Args: 100 imgs (~torch.autograd.Variable): A variable with a batch of images. 101 bboxes (~torch.autograd.Variable): A batch of bounding boxes. 102 Its shape is :math:`(N, R, 4)`. 103 labels (~torch.autograd..Variable): A batch of labels. 104 Its shape is :math:`(N, R)`. The background is excluded from 105 the definition, which means that the range of the value 106 is :math:`[0, L - 1]`. :math:`L` is the number of foreground 107 classes. 108 scale (float): Amount of scaling applied to 109 the raw image during preprocessing. 110 111 Returns: 112 namedtuple of 5 losses 113 """ 114 n = bboxes.shape[0] 115 if n != 1: 116 raise ValueError('Currently only batch size 1 is supported.') 117 118 _, _, H, W = imgs.shape 119 img_size = (H, W) 120 121 features = self.faster_rcnn.extractor(imgs) 122 123 rpn_locs, rpn_scores, rois, roi_indices, anchor = \ 124 self.faster_rcnn.rpn(features, img_size, scale) 125 126 # Since batch size is one, convert variables to singular form 127 bbox = bboxes[0] 128 label = labels[0] 129 rpn_score = rpn_scores[0] 130 rpn_loc = rpn_locs[0] 131 roi = rois 132 133 # Sample RoIs and forward 134 # it's fine to break the computation graph of rois, 135 # consider them as constant input 136 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( 137 roi, 138 to_numpy(bbox), 139 to_numpy(label), 140 self.loc_normalize_mean, 141 self.loc_normalize_std) 142 # NOTE it's all zero because now it only support for batch=1 now 143 sample_roi_index = torch.zeros(len(sample_roi)) 144 roi_cls_loc, roi_score = self.faster_rcnn.head( 145 features, 146 sample_roi, 147 sample_roi_index) 148 149 # ------------------ RPN losses -------------------# 150 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( 151 to_numpy(bbox), 152 anchor, 153 img_size) 154 gt_rpn_label = to_tensor(gt_rpn_label, self.device).long() 155 gt_rpn_loc = to_tensor(gt_rpn_loc, self.device) 156 rpn_loc_loss = _fast_rcnn_loc_loss( 157 rpn_loc, 158 gt_rpn_loc, 159 gt_rpn_label.data, 160 self.rpn_sigma) 161 162 # NOTE: default value of ignore_index is -100 ... 163 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1) 164 _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] 165 _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1] 166 self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long()) 167 168 # ------------------ ROI losses (fast rcnn loss) -------------------# 169 n_sample = roi_cls_loc.shape[0] 170 roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) 171 roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \ 172 to_tensor(gt_roi_label, self.device).long()] 173 gt_roi_label = to_tensor(gt_roi_label, self.device).long() 174 gt_roi_loc = to_tensor(gt_roi_loc, self.device) 175 176 roi_loc_loss = _fast_rcnn_loc_loss( 177 roi_loc.contiguous(), 178 gt_roi_loc, 179 gt_roi_label.data, 180 self.roi_sigma) 181 182 roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label) 183 184 self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long()) 185 186 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] 187 losses = losses + [sum(losses)] 188 189 return LossTuple(*losses) 190 191 def step(self, imgs, bboxes, labels, scale): 192 self.optimizer.zero_grad() 193 losses = self.forward(imgs, bboxes, labels, scale) 194 losses.total_loss.backward() 195 self.optimizer.step() 196 self.update_meters(losses) 197 return losses 198 199 def eval(self, val_loader, test_num=10000): 200 201 pred_bboxes, pred_labels, pred_scores = list(), list(), list() 202 gt_bboxes, gt_labels, gt_difficults = list(), list(), list() 203 204 for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader): 205 sizes = [sizes[0][0], sizes[1][0]] 206 pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes]) 207 gt_bboxes += list(gt_bboxes_.numpy()) 208 gt_labels += list(gt_labels_.numpy()) 209 gt_difficults += list(gt_difficults_.numpy()) 210 pred_bboxes += pred_bboxes_ 211 pred_labels += pred_labels_ 212 pred_scores += pred_scores_ 213 if ii == test_num: break 214 215 result = eval_detection_voc( 216 pred_bboxes, pred_labels, pred_scores, 217 gt_bboxes, gt_labels, gt_difficults, 218 use_07_metric=True) 219 return result 220 221 222 def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')): 223 state_dict = torch.load(path, map_location=map_location) 224 if 'model' in state_dict: 225 self.faster_rcnn.load_state_dict(state_dict['model']) 226 else: # legacy way, for backward compatibility 227 self.faster_rcnn.load_state_dict(state_dict) 228 return self 229 if parse_opt: 230 opt._parse(state_dict['config']) 231 if 'optimizer' in state_dict and load_optimizer: 232 self.optimizer.load_state_dict(state_dict['optimizer']) 233 return self 234 235 def save(self, save_optimizer=False, save_path=None, **kwargs): 236 """serialize models include optimizer and other info 237 return path where the model-file is stored. 238 239 Args: 240 save_optimizer (bool): whether save optimizer.state_dict(). 241 save_path (string): where to save model, if it's None, save_path 242 is generate using time str and info from kwargs. 243 244 Returns: 245 save_path(str): the path to save models. 246 """ 247 save_dict = dict() 248 249 save_dict['model'] = self.faster_rcnn.state_dict() 250 save_dict['config'] = opt._state_dict() 251 save_dict['other_info'] = kwargs 252 # save_dict['vis_info'] = self.vis.state_dict() 253 254 if save_optimizer: 255 save_dict['optimizer'] = self.optimizer.state_dict() 256 257 if save_path is None: 258 timestr = time.strftime('%m%d%H%M') 259 save_path = 'checkpoints/fasterrcnn_%s' % timestr 260 for k_, v_ in kwargs.items(): 261 save_path += '_%s' % v_ 262 263 save_dir = os.path.dirname(save_path) 264 if not os.path.exists(save_dir): 265 os.makedirs(save_dir) 266 267 torch.save(save_dict, save_path) 268 # self.vis.save([self.vis.env]) 269 return save_path 270 271 def update_meters(self, losses): 272 loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()} 273 for key, meter in self.meters.items(): 274 meter.add(loss_d[key]) 275 276 def reset_meters(self): 277 for key, meter in self.meters.items(): 278 meter.reset() 279 self.roi_cm.reset() 280 self.rpn_cm.reset() 281 282 def get_meter_data(self): 283 return {k: v.value()[0] for k, v in self.meters.items()}
wrapper for conveniently training. return losses
The losses include:
rpn_loc_loss
: The localization loss for Region Proposal Network (RPN).rpn_cls_loss
: The classification loss for RPN.roi_loc_loss
: The localization loss for the head module.roi_cls_loss
: The classification loss for the head module.total_loss
: The sum of 4 loss above.
Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained.
63 def __init__(self, device=torch.device('cpu')): 64 super(FasterRCNN, self).__init__() 65 66 self.device = device 67 68 self.faster_rcnn = FasterRCNNVGG16(device=device) 69 self.rpn_sigma = opt.rpn_sigma 70 self.roi_sigma = opt.roi_sigma 71 72 # target creator create gt_bbox gt_label etc as training targets. 73 self.anchor_target_creator = AnchorTargetCreator() 74 self.proposal_target_creator = ProposalTargetCreator() 75 76 self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean 77 self.loc_normalize_std = self.faster_rcnn.loc_normalize_std 78 79 self.optimizer = self.faster_rcnn.get_optimizer() 80 81 # indicators for training status 82 self.rpn_cm = ConfusionMeter(2) 83 self.roi_cm = ConfusionMeter(21) 84 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss
Initializes internal Module state, shared by both nn.Module and ScriptModule.
89 def forward(self, imgs, bboxes, labels, scale): 90 """Forward Faster R-CNN and calculate losses. 91 92 Here are notations used. 93 94 * :math:`N` is the batch size. 95 * :math:`R` is the number of bounding boxes per image. 96 97 Currently, only :math:`N=1` is supported. 98 99 Args: 100 imgs (~torch.autograd.Variable): A variable with a batch of images. 101 bboxes (~torch.autograd.Variable): A batch of bounding boxes. 102 Its shape is :math:`(N, R, 4)`. 103 labels (~torch.autograd..Variable): A batch of labels. 104 Its shape is :math:`(N, R)`. The background is excluded from 105 the definition, which means that the range of the value 106 is :math:`[0, L - 1]`. :math:`L` is the number of foreground 107 classes. 108 scale (float): Amount of scaling applied to 109 the raw image during preprocessing. 110 111 Returns: 112 namedtuple of 5 losses 113 """ 114 n = bboxes.shape[0] 115 if n != 1: 116 raise ValueError('Currently only batch size 1 is supported.') 117 118 _, _, H, W = imgs.shape 119 img_size = (H, W) 120 121 features = self.faster_rcnn.extractor(imgs) 122 123 rpn_locs, rpn_scores, rois, roi_indices, anchor = \ 124 self.faster_rcnn.rpn(features, img_size, scale) 125 126 # Since batch size is one, convert variables to singular form 127 bbox = bboxes[0] 128 label = labels[0] 129 rpn_score = rpn_scores[0] 130 rpn_loc = rpn_locs[0] 131 roi = rois 132 133 # Sample RoIs and forward 134 # it's fine to break the computation graph of rois, 135 # consider them as constant input 136 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( 137 roi, 138 to_numpy(bbox), 139 to_numpy(label), 140 self.loc_normalize_mean, 141 self.loc_normalize_std) 142 # NOTE it's all zero because now it only support for batch=1 now 143 sample_roi_index = torch.zeros(len(sample_roi)) 144 roi_cls_loc, roi_score = self.faster_rcnn.head( 145 features, 146 sample_roi, 147 sample_roi_index) 148 149 # ------------------ RPN losses -------------------# 150 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( 151 to_numpy(bbox), 152 anchor, 153 img_size) 154 gt_rpn_label = to_tensor(gt_rpn_label, self.device).long() 155 gt_rpn_loc = to_tensor(gt_rpn_loc, self.device) 156 rpn_loc_loss = _fast_rcnn_loc_loss( 157 rpn_loc, 158 gt_rpn_loc, 159 gt_rpn_label.data, 160 self.rpn_sigma) 161 162 # NOTE: default value of ignore_index is -100 ... 163 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1) 164 _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] 165 _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1] 166 self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long()) 167 168 # ------------------ ROI losses (fast rcnn loss) -------------------# 169 n_sample = roi_cls_loc.shape[0] 170 roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) 171 roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \ 172 to_tensor(gt_roi_label, self.device).long()] 173 gt_roi_label = to_tensor(gt_roi_label, self.device).long() 174 gt_roi_loc = to_tensor(gt_roi_loc, self.device) 175 176 roi_loc_loss = _fast_rcnn_loc_loss( 177 roi_loc.contiguous(), 178 gt_roi_loc, 179 gt_roi_label.data, 180 self.roi_sigma) 181 182 roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label) 183 184 self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long()) 185 186 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] 187 losses = losses + [sum(losses)] 188 189 return LossTuple(*losses)
Forward Faster R-CNN and calculate losses.
Here are notations used.
- \( N \) is the batch size.
- \( R \) is the number of bounding boxes per image.
Currently, only \( N=1 \) is supported.
Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is \( (N, R, 4) \). labels (~torch.autograd..Variable): A batch of labels. Its shape is \( (N, R) \). The background is excluded from the definition, which means that the range of the value is \( [0, L - 1] \). \( L \) is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing.
Returns: namedtuple of 5 losses
199 def eval(self, val_loader, test_num=10000): 200 201 pred_bboxes, pred_labels, pred_scores = list(), list(), list() 202 gt_bboxes, gt_labels, gt_difficults = list(), list(), list() 203 204 for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader): 205 sizes = [sizes[0][0], sizes[1][0]] 206 pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes]) 207 gt_bboxes += list(gt_bboxes_.numpy()) 208 gt_labels += list(gt_labels_.numpy()) 209 gt_difficults += list(gt_difficults_.numpy()) 210 pred_bboxes += pred_bboxes_ 211 pred_labels += pred_labels_ 212 pred_scores += pred_scores_ 213 if ii == test_num: break 214 215 result = eval_detection_voc( 216 pred_bboxes, pred_labels, pred_scores, 217 gt_bboxes, gt_labels, gt_difficults, 218 use_07_metric=True) 219 return result
Sets the module in evaluation mode.
This has any effect only on certain modules. See documentations of
particular modules for details of their behaviors in training/evaluation
mode, if they are affected, e.g. Dropout
, BatchNorm
,
etc.
This is equivalent with self.train(False) <torch.nn.Module.train>()
.
See :ref:locally-disable-grad-doc
for a comparison between
.eval()
and several similar mechanisms that may be confused with it.
Returns: Module: self
222 def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')): 223 state_dict = torch.load(path, map_location=map_location) 224 if 'model' in state_dict: 225 self.faster_rcnn.load_state_dict(state_dict['model']) 226 else: # legacy way, for backward compatibility 227 self.faster_rcnn.load_state_dict(state_dict) 228 return self 229 if parse_opt: 230 opt._parse(state_dict['config']) 231 if 'optimizer' in state_dict and load_optimizer: 232 self.optimizer.load_state_dict(state_dict['optimizer']) 233 return self
235 def save(self, save_optimizer=False, save_path=None, **kwargs): 236 """serialize models include optimizer and other info 237 return path where the model-file is stored. 238 239 Args: 240 save_optimizer (bool): whether save optimizer.state_dict(). 241 save_path (string): where to save model, if it's None, save_path 242 is generate using time str and info from kwargs. 243 244 Returns: 245 save_path(str): the path to save models. 246 """ 247 save_dict = dict() 248 249 save_dict['model'] = self.faster_rcnn.state_dict() 250 save_dict['config'] = opt._state_dict() 251 save_dict['other_info'] = kwargs 252 # save_dict['vis_info'] = self.vis.state_dict() 253 254 if save_optimizer: 255 save_dict['optimizer'] = self.optimizer.state_dict() 256 257 if save_path is None: 258 timestr = time.strftime('%m%d%H%M') 259 save_path = 'checkpoints/fasterrcnn_%s' % timestr 260 for k_, v_ in kwargs.items(): 261 save_path += '_%s' % v_ 262 263 save_dir = os.path.dirname(save_path) 264 if not os.path.exists(save_dir): 265 os.makedirs(save_dir) 266 267 torch.save(save_dict, save_path) 268 # self.vis.save([self.vis.env]) 269 return save_path
serialize models include optimizer and other info return path where the model-file is stored.
Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs.
Returns: save_path(str): the path to save models.
Inherited Members
- torch.nn.modules.module.Module
- dump_patches
- register_buffer
- register_parameter
- add_module
- register_module
- get_submodule
- get_parameter
- get_buffer
- get_extra_state
- set_extra_state
- apply
- cuda
- xpu
- cpu
- type
- float
- double
- half
- bfloat16
- to_empty
- to
- register_backward_hook
- register_full_backward_hook
- register_forward_pre_hook
- register_forward_hook
- state_dict
- load_state_dict
- parameters
- named_parameters
- buffers
- named_buffers
- children
- named_children
- modules
- named_modules
- train
- requires_grad_
- zero_grad
- extra_repr