在本教程中,您将学习如何使用Detectron2 在自定义数据集上训练一个目标检测网络。我们需要下载一个非coco格式的目标检测数据集
!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip # 我们提前下载好,也可通过滴滴云S3下载 # !wget https://dataset-public.s3-internal.didiyunapi.com/DAI教程/datasets/balloon_dataset.zip !unzip balloon_dataset.zip
--2020-04-26 15:14:09-- https://dataset-public.s3-internal.didiyunapi.com/DAI%E6%95%99%E7%A8%8B/datasets/balloon_dataset.zip Resolving dataset-public.s3-internal.didiyunapi.com (dataset-public.s3-internal.didiyunapi.com)... 100.64.8.5 Connecting to dataset-public.s3-internal.didiyunapi.com (dataset-public.s3-internal.didiyunapi.com)|100.64.8.5|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 38741381 (37M) [application/zip] Saving to: ‘balloon_dataset.zip’ 100%[======================================>] 38,741,381 110MB/s in 0.3s 2020-04-26 15:14:09 (110 MB/s) - ‘balloon_dataset.zip’ saved [38741381/38741381]
import detectron2 from detectron2.utils.logger import setup_logger setup_logger() import numpy as np import cv2 import random from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.utils.visualizer import Visualizer from detectron2.data import MetadataCatalog import matplotlib.pyplot as plt def cv2_imshow(img_bgr): plt.rcParams['figure.figsize'] = (18, 36) plt.axis('off') plt.imshow(img_bgr[...,::-1])
# 下面我们需要把自定义数据格式改成标准格式,如果数据集为标准的COCO格式,可以省去格式处理代码,用下面3行代替即可 # from detectron2.data.datasets import register_coco_instances # register_coco_instances("my_dataset_train", {}, "json_annotation_train.json", "path/to/image/dir") # register_coco_instances("my_dataset_val", {}, "json_annotation_val.json", "path/to/image/dir") import os import json from detectron2.structures import BoxMode def get_balloon_dicts(img_dir): json_file = os.path.join(img_dir, "via_region_data.json") with open(json_file) as f: imgs_anns = json.load(f) dataset_dicts = [] for idx, v in enumerate(imgs_anns.values()): record = {} filename = os.path.join(img_dir, v["filename"]) height, width = cv2.imread(filename).shape[:2] record["file_name"] = filename record["image_id"] = idx record["height"] = height record["width"] = width annos = v["regions"] objs = [] for _, anno in annos.items(): assert not anno["region_attributes"] anno = anno["shape_attributes"] px = anno["all_points_x"] py = anno["all_points_y"] poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)] poly = [p for x in poly for p in x] obj = { "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)], "bbox_mode": BoxMode.XYXY_ABS, "segmentation": [poly], "category_id": 0, "iscrowd": 0 } objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts from detectron2.data import DatasetCatalog, MetadataCatalog for d in ["train", "val"]: DatasetCatalog.register("balloon_" + d, lambda d=d: get_balloon_dicts("balloon/" + d)) MetadataCatalog.get("balloon_" + d).set(thing_classes=["balloon"]) balloon_metadata = MetadataCatalog.get("balloon_train")
# 我们从数据集里随机可视化一张图片和标注,来验证数据加载是否正确 dataset_dicts = get_balloon_dicts("balloon/train") for d in random.sample(dataset_dicts, 3): img = cv2.imread(d["file_name"]) visualizer = Visualizer(img[:, :, ::-1], metadata=balloon_metadata, scale=0.5) vis = visualizer.draw_dataset_dict(d) cv2_imshow(vis.get_image()[:, :, ::-1])
# 下面我们使用预训练模型 R50-FPN Mask R-CNN 开始训练 from detectron2.engine import DefaultTrainer from detectron2.config import get_cfg cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("balloon_train",) cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 4 cfg.MODEL.WEIGHTS = cfg.MODEL.WEIGHTS = '/home/dc2-user/Detectron2/model_final_b275ba.pkl' # 请参考(1)下载预训练模型 cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.0001 cfg.SOLVER.MAX_ITER = 500 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train()
•[32m[04/26 15:35:02 d2.engine.defaults]: •[0mModel: GeneralizedRCNN( (backbone): FPN( (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (top_block): LastLevelMaxPool() (bottom_up): ResNet( (stem): BasicStem( (conv1): Conv2d( 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) ) (res2): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv1): Conv2d( 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) ) (res3): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv1): Conv2d( 256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) ) (res4): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv1): Conv2d( 512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (4): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (5): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) ) (res5): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv1): Conv2d( 1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) ) ) ) (proposal_generator): RPN( (anchor_generator): DefaultAnchorGenerator( (cell_anchors): BufferList() ) (rpn_head): StandardRPNHead( (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (objectness_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1)) (anchor_deltas): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1)) ) ) (roi_heads): StandardROIHeads( (box_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (box_head): FastRCNNConvFCHead( (fc1): Linear(in_features=12544, out_features=1024, bias=True) (fc2): Linear(in_features=1024, out_features=1024, bias=True) ) (box_predictor): FastRCNNOutputLayers( (cls_score): Linear(in_features=1024, out_features=2, bias=True) (bbox_pred): Linear(in_features=1024, out_features=4, bias=True) ) (mask_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(14, 14), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(14, 14), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(14, 14), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(14, 14), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (mask_head): MaskRCNNConvUpsampleHead( (mask_fcn1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (mask_fcn2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (mask_fcn3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (mask_fcn4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (deconv): ConvTranspose2d(256, 256, kernel_size=(2, 2), stride=(2, 2)) (predictor): Conv2d(256, 1, kernel_size=(1, 1), stride=(1, 1)) ) ) ) •[32m[04/26 15:35:04 d2.data.build]: •[0mRemoved 0 images with no usable annotations. 61 images left. •[32m[04/26 15:35:04 d2.data.common]: •[0mSerializing 61 elements to byte tensors and concatenating them all ... •[32m[04/26 15:35:04 d2.data.common]: •[0mSerialized dataset takes 0.17 MiB •[32m[04/26 15:35:04 d2.data.detection_utils]: •[0mTransformGens used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()] •[32m[04/26 15:35:04 d2.data.build]: •[0mUsing training sampler TrainingSampler
'roi_heads.box_predictor.cls_score.weight' has shape (81, 1024) in the checkpoint but (2, 1024) in the model! Skipped. 'roi_heads.box_predictor.cls_score.bias' has shape (81,) in the checkpoint but (2,) in the model! Skipped. 'roi_heads.box_predictor.bbox_pred.weight' has shape (320, 1024) in the checkpoint but (4, 1024) in the model! Skipped. 'roi_heads.box_predictor.bbox_pred.bias' has shape (320,) in the checkpoint but (4,) in the model! Skipped.
•[32m[04/26 15:35:05 d2.engine.train_loop]: •[0mStarting training from iteration 0 •[32m[04/26 15:35:19 d2.utils.events]: •[0m eta: 0:04:26 iter: 19 total_loss: 2.130 loss_cls: 0.735 loss_box_reg: 0.640 loss_mask: 0.690 loss_rpn_cls: 0.031 loss_rpn_loc: 0.007 time: 0.5656 data_time: 0.0406 lr: 0.000002 max_mem: 2724M •[32m[04/26 15:35:31 d2.utils.events]: •[0m eta: 0:04:16 iter: 39 total_loss: 2.083 loss_cls: 0.725 loss_box_reg: 0.633 loss_mask: 0.690 loss_rpn_cls: 0.031 loss_rpn_loc: 0.009 time: 0.5661 data_time: 0.0088 lr: 0.000004 max_mem: 2724M •[32m[04/26 15:35:42 d2.utils.events]: •[0m eta: 0:04:06 iter: 59 total_loss: 2.107 loss_cls: 0.694 loss_box_reg: 0.615 loss_mask: 0.689 loss_rpn_cls: 0.041 loss_rpn_loc: 0.011 time: 0.5671 data_time: 0.0093 lr: 0.000006 max_mem: 2724M •[32m[04/26 15:35:54 d2.utils.events]: •[0m eta: 0:03:58 iter: 79 total_loss: 2.026 loss_cls: 0.656 loss_box_reg: 0.645 loss_mask: 0.686 loss_rpn_cls: 0.032 loss_rpn_loc: 0.011 time: 0.5713 data_time: 0.0089 lr: 0.000008 max_mem: 2724M •[32m[04/26 15:36:05 d2.utils.events]: •[0m eta: 0:03:47 iter: 99 total_loss: 1.989 loss_cls: 0.601 loss_box_reg: 0.637 loss_mask: 0.685 loss_rpn_cls: 0.043 loss_rpn_loc: 0.010 time: 0.5746 data_time: 0.0085 lr: 0.000010 max_mem: 2846M •[32m[04/26 15:36:17 d2.utils.events]: •[0m eta: 0:03:37 iter: 119 total_loss: 1.952 loss_cls: 0.551 loss_box_reg: 0.595 loss_mask: 0.682 loss_rpn_cls: 0.033 loss_rpn_loc: 0.010 time: 0.5761 data_time: 0.0086 lr: 0.000012 max_mem: 2846M •[32m[04/26 15:36:29 d2.utils.events]: •[0m eta: 0:03:29 iter: 139 total_loss: 1.828 loss_cls: 0.518 loss_box_reg: 0.586 loss_mask: 0.678 loss_rpn_cls: 0.021 loss_rpn_loc: 0.005 time: 0.5791 data_time: 0.0085 lr: 0.000014 max_mem: 2846M •[32m[04/26 15:36:41 d2.utils.events]: •[0m eta: 0:03:18 iter: 159 total_loss: 1.868 loss_cls: 0.484 loss_box_reg: 0.709 loss_mask: 0.675 loss_rpn_cls: 0.034 loss_rpn_loc: 0.010 time: 0.5804 data_time: 0.0087 lr: 0.000016 max_mem: 2846M •[32m[04/26 15:36:53 d2.utils.events]: •[0m eta: 0:03:06 iter: 179 total_loss: 1.836 loss_cls: 0.443 loss_box_reg: 0.653 loss_mask: 0.672 loss_rpn_cls: 0.034 loss_rpn_loc: 0.008 time: 0.5808 data_time: 0.0091 lr: 0.000018 max_mem: 2846M •[32m[04/26 15:37:04 d2.utils.events]: •[0m eta: 0:02:55 iter: 199 total_loss: 1.824 loss_cls: 0.423 loss_box_reg: 0.694 loss_mask: 0.666 loss_rpn_cls: 0.020 loss_rpn_loc: 0.006 time: 0.5814 data_time: 0.0090 lr: 0.000020 max_mem: 2846M •[32m[04/26 15:37:16 d2.utils.events]: •[0m eta: 0:02:43 iter: 219 total_loss: 1.597 loss_cls: 0.379 loss_box_reg: 0.504 loss_mask: 0.663 loss_rpn_cls: 0.038 loss_rpn_loc: 0.007 time: 0.5816 data_time: 0.0092 lr: 0.000022 max_mem: 2846M •[32m[04/26 15:37:28 d2.utils.events]: •[0m eta: 0:02:32 iter: 239 total_loss: 1.698 loss_cls: 0.362 loss_box_reg: 0.645 loss_mask: 0.656 loss_rpn_cls: 0.028 loss_rpn_loc: 0.008 time: 0.5815 data_time: 0.0082 lr: 0.000024 max_mem: 2846M •[32m[04/26 15:37:40 d2.utils.events]: •[0m eta: 0:02:20 iter: 259 total_loss: 1.756 loss_cls: 0.357 loss_box_reg: 0.687 loss_mask: 0.655 loss_rpn_cls: 0.021 loss_rpn_loc: 0.011 time: 0.5831 data_time: 0.0083 lr: 0.000026 max_mem: 2846M •[32m[04/26 15:37:52 d2.utils.events]: •[0m eta: 0:02:09 iter: 279 total_loss: 1.640 loss_cls: 0.326 loss_box_reg: 0.639 loss_mask: 0.653 loss_rpn_cls: 0.034 loss_rpn_loc: 0.009 time: 0.5843 data_time: 0.0084 lr: 0.000028 max_mem: 2846M •[32m[04/26 15:38:04 d2.utils.events]: •[0m eta: 0:01:57 iter: 299 total_loss: 1.611 loss_cls: 0.305 loss_box_reg: 0.649 loss_mask: 0.640 loss_rpn_cls: 0.023 loss_rpn_loc: 0.007 time: 0.5852 data_time: 0.0091 lr: 0.000030 max_mem: 2846M •[32m[04/26 15:38:15 d2.utils.events]: •[0m eta: 0:01:46 iter: 319 total_loss: 1.533 loss_cls: 0.285 loss_box_reg: 0.576 loss_mask: 0.624 loss_rpn_cls: 0.026 loss_rpn_loc: 0.007 time: 0.5846 data_time: 0.0085 lr: 0.000032 max_mem: 2846M •[32m[04/26 15:38:27 d2.utils.events]: •[0m eta: 0:01:34 iter: 339 total_loss: 1.631 loss_cls: 0.287 loss_box_reg: 0.661 loss_mask: 0.643 loss_rpn_cls: 0.023 loss_rpn_loc: 0.011 time: 0.5858 data_time: 0.0082 lr: 0.000034 max_mem: 2846M •[32m[04/26 15:38:39 d2.utils.events]: •[0m eta: 0:01:22 iter: 359 total_loss: 1.524 loss_cls: 0.235 loss_box_reg: 0.603 loss_mask: 0.622 loss_rpn_cls: 0.020 loss_rpn_loc: 0.007 time: 0.5854 data_time: 0.0085 lr: 0.000036 max_mem: 2846M •[32m[04/26 15:38:51 d2.utils.events]: •[0m eta: 0:01:11 iter: 379 total_loss: 1.538 loss_cls: 0.235 loss_box_reg: 0.659 loss_mask: 0.623 loss_rpn_cls: 0.020 loss_rpn_loc: 0.010 time: 0.5856 data_time: 0.0085 lr: 0.000038 max_mem: 2846M •[32m[04/26 15:39:03 d2.utils.events]: •[0m eta: 0:00:59 iter: 399 total_loss: 1.414 loss_cls: 0.202 loss_box_reg: 0.573 loss_mask: 0.609 loss_rpn_cls: 0.016 loss_rpn_loc: 0.004 time: 0.5861 data_time: 0.0087 lr: 0.000040 max_mem: 2846M •[32m[04/26 15:39:15 d2.utils.events]: •[0m eta: 0:00:47 iter: 419 total_loss: 1.478 loss_cls: 0.216 loss_box_reg: 0.579 loss_mask: 0.611 loss_rpn_cls: 0.026 loss_rpn_loc: 0.011 time: 0.5863 data_time: 0.0091 lr: 0.000042 max_mem: 2935M •[32m[04/26 15:39:27 d2.utils.events]: •[0m eta: 0:00:35 iter: 439 total_loss: 1.341 loss_cls: 0.180 loss_box_reg: 0.514 loss_mask: 0.595 loss_rpn_cls: 0.024 loss_rpn_loc: 0.006 time: 0.5870 data_time: 0.0085 lr: 0.000044 max_mem: 2935M •[32m[04/26 15:39:39 d2.utils.events]: •[0m eta: 0:00:24 iter: 459 total_loss: 1.420 loss_cls: 0.186 loss_box_reg: 0.582 loss_mask: 0.599 loss_rpn_cls: 0.024 loss_rpn_loc: 0.008 time: 0.5873 data_time: 0.0084 lr: 0.000046 max_mem: 2935M •[32m[04/26 15:39:51 d2.utils.events]: •[0m eta: 0:00:12 iter: 479 total_loss: 1.345 loss_cls: 0.169 loss_box_reg: 0.522 loss_mask: 0.601 loss_rpn_cls: 0.020 loss_rpn_loc: 0.007 time: 0.5877 data_time: 0.0084 lr: 0.000048 max_mem: 2935M •[32m[04/26 15:40:03 d2.utils.events]: •[0m eta: 0:00:00 iter: 499 total_loss: 1.215 loss_cls: 0.143 loss_box_reg: 0.487 loss_mask: 0.564 loss_rpn_cls: 0.016 loss_rpn_loc: 0.006 time: 0.5887 data_time: 0.0087 lr: 0.000050 max_mem: 2935M •[32m[04/26 15:40:05 d2.engine.hooks]: •[0mOverall training speed: 497 iterations in 0:04:53 (0.5900 s / it) •[32m[04/26 15:40:05 d2.engine.hooks]: •[0mTotal training time: 0:04:55 (0:00:02 on hooks)
# 训练完成后我们来测试一下效果 cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 cfg.DATASETS.TEST = ("balloon_val", ) predictor = DefaultPredictor(cfg) from detectron2.utils.visualizer import ColorMode dataset_dicts = get_balloon_dicts("balloon/val") for d in random.sample(dataset_dicts, 3): im = cv2.imread(d["file_name"]) outputs = predictor(im) v = Visualizer(im[:, :, ::-1], metadata=balloon_metadata, scale=0.8, instance_mode=ColorMode.IMAGE_BW ) v = v.draw_instance_predictions(outputs["instances"].to("cpu")) cv2_imshow(v.get_image()[:, :, ::-1])