Merujuk pada model di TORCHVISION.MODELS.DETECTION.MASK_RCNN yaitu banyak sekali pengaturan sebelum tuning – https://pytorch.org/vision/0.8/_modules/torchvision/models/detection/mask_rcnn.html
Misalkan dengan cara berikut ref: https://www.kaggle.com/code/abhishek/inference-for-mask-rcnn
num_classes = 10 model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) in_features = model_ft.roi_heads.box_predictor.cls_score.in_features model_ft.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) in_features_mask = model_ft.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 model_ft.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
atau dengan cara seperti berikut ref: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
Cara pertama
import torchvision from torchvision.models.detection.faster_rcnn import FastRCNNPredictor # load a model pre-trained on COCO model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT") # replace the classifier with a new one, that has # num_classes which is user-defined num_classes = 2 # 1 class (person) + background # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
Cara kedua
import torchvision from torchvision.models.detection import FasterRCNN from torchvision.models.detection.rpn import AnchorGenerator # load a pre-trained model for classification and return # only the features backbone = torchvision.models.mobilenet_v2(weights="DEFAULT").features # ``FasterRCNN`` needs to know the number of # output channels in a backbone. For mobilenet_v2, it's 1280 # so we need to add it here backbone.out_channels = 1280 # let's make the RPN generate 5 x 3 anchors per spatial # location, with 5 different sizes and 3 different aspect # ratios. We have a Tuple[Tuple[int]] because each feature # map could potentially have different sizes and # aspect ratios anchor_generator = AnchorGenerator( sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),) ) # let's define what are the feature maps that we will # use to perform the region of interest cropping, as well as # the size of the crop after rescaling. # if your backbone returns a Tensor, featmap_names is expected to # be [0]. More generally, the backbone should return an # ``OrderedDict[Tensor]``, and in ``featmap_names`` you can choose which # feature maps to use. roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0'], output_size=7, sampling_ratio=2 ) # put the pieces together inside a Faster-RCNN model model = FasterRCNN( backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler )
Sedangkan untuk inference nya bisa menggunakan seperti berikut
import torchvision from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor def get_model_instance_segmentation(num_classes): # load an instance segmentation model pre-trained on COCO model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT") # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes ) return model
Atau menggunakan cara berikut https://github.com/Jaykumaran/Medical_Imaging_Referenced/blob/main/Nuclei_Instance_Segmentation/Torchvision_MaskRCNN.ipynb
model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(pretrained=False) from class_names import INSTANCE_CATEGORY_NAMES as class_names model.roi_heads.box_predictor.cls_score = nn.Linear(in_features=1024,out_features=len(class_names),bias=True) model.roi_heads.box_predictor.bbox_pred = nn.Linear(in_features=1024,out_features=len(class_names)*4,bias=True) model.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d(256,len(class_names),kernel_size=(1,1),stride=(1,1))
Bisa juga menggunakan ref : https://colab.research.google.com/github/cj-mills/pytorch-mask-rcnn-tutorial-code/blob/main/notebooks/pytorch-mask-r-cnn-training-colab.ipynb
# Import Mask R-CNN from torchvision.models.detection import maskrcnn_resnet50_fpn_v2, MaskRCNN from torchvision.models.detection import MaskRCNN_ResNet50_FPN_V2_Weights from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor class_names = ['background','person'] # Initialize a Mask R-CNN model with pretrained weights model = maskrcnn_resnet50_fpn_v2(weights='DEFAULT') # Get the number of input features for the classifier in_features_box = model.roi_heads.box_predictor.cls_score.in_features in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels # Get the numbner of output channels for the Mask Predictor dim_reduced = model.roi_heads.mask_predictor.conv5_mask.out_channels # Replace the box predictor model.roi_heads.box_predictor = FastRCNNPredictor(in_channels=in_features_box, num_classes=len(class_names)) # Replace the mask predictor model.roi_heads.mask_predictor = MaskRCNNPredictor(in_channels=in_features_mask, dim_reduced=dim_reduced, num_classes=len(class_names)) # Set the model's device and data type model.to(device=device, dtype=dtype); # Add attributes to store the device and model name for later reference model.device = device model.name = 'maskrcnn_resnet50_fpn_v2'
atau menggunakan https://www.kaggle.com/code/suraj520/maskrcnn-iou-0-79-0-31-know-train-infer?scriptVersionId=136698261
from torchvision import datasets,models # bbox and mask predictors from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor def SegmentationModel(num_classes): #loading instance seg pretrained model model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(pretrained=True) in_features = model.roi_heads.box_predictor.cls_score.in_features #replacing pretrained head with new model.roi_heads.box_predictor = FastRCNNPredictor(in_features,num_classes) #now doing the same for input features for mask in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer=512 model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,hidden_layer,num_classes) return model
Bila lebih advance bisa menggunakan argument input berikut
model = torchvision.models.get_model( 'maskrcnn_resnet50_fpn_v2', weights = 'MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1', #num_classes = len(class_names), box_detections_per_img = 1000, min_size = 600, max_size = 2000) model.roi_heads.box_predictor.cls_score = nn.Linear(in_features=1024,out_features=len(class_names),bias=True) model.roi_heads.box_predictor.bbox_pred = nn.Linear(in_features=1024,out_features=len(class_names)*4,bias=True) model.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d(256,len(class_names),kernel_size=(1,1),stride=(1,1)) model.to(device)# move model to the right device