Merujuk pada model di TORCHVISION.MODELS.DETECTION.MASK_RCNN yaitu banyak sekali pengaturan sebelum tuning – https://pytorch.org/vision/0.8/_modules/torchvision/models/detection/mask_rcnn.html
Misalkan dengan cara berikut ref: https://www.kaggle.com/code/abhishek/inference-for-mask-rcnn
num_classes = 10 model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) in_features = model_ft.roi_heads.box_predictor.cls_score.in_features model_ft.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) in_features_mask = model_ft.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 model_ft.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
atau dengan cara seperti berikut ref: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
Cara pertama
import torchvision from torchvision.models.detection.faster_rcnn import FastRCNNPredictor # load a model pre-trained on COCO model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT") # replace the classifier with a new one, that has # num_classes which is user-defined num_classes = 2 # 1 class (person) + background # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
Cara kedua
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(weights="DEFAULT").features
# ``FasterRCNN`` needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280
# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(
sizes=((32, 64, 128, 256, 512),),
aspect_ratios=((0.5, 1.0, 2.0),)
)
# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# ``OrderedDict[Tensor]``, and in ``featmap_names`` you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(
featmap_names=['0'],
output_size=7,
sampling_ratio=2
)
# put the pieces together inside a Faster-RCNN model
model = FasterRCNN(
backbone,
num_classes=2,
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler
)
Sedangkan untuk inference nya bisa menggunakan seperti berikut
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(
in_features_mask,
hidden_layer,
num_classes
)
return model
Atau menggunakan cara berikut https://github.com/Jaykumaran/Medical_Imaging_Referenced/blob/main/Nuclei_Instance_Segmentation/Torchvision_MaskRCNN.ipynb
model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(pretrained=False)
from class_names import INSTANCE_CATEGORY_NAMES as class_names
model.roi_heads.box_predictor.cls_score = nn.Linear(in_features=1024,out_features=len(class_names),bias=True)
model.roi_heads.box_predictor.bbox_pred = nn.Linear(in_features=1024,out_features=len(class_names)*4,bias=True)
model.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d(256,len(class_names),kernel_size=(1,1),stride=(1,1))
Bisa juga menggunakan ref : https://colab.research.google.com/github/cj-mills/pytorch-mask-rcnn-tutorial-code/blob/main/notebooks/pytorch-mask-r-cnn-training-colab.ipynb
# Import Mask R-CNN from torchvision.models.detection import maskrcnn_resnet50_fpn_v2, MaskRCNN from torchvision.models.detection import MaskRCNN_ResNet50_FPN_V2_Weights from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor class_names = ['background','person'] # Initialize a Mask R-CNN model with pretrained weights model = maskrcnn_resnet50_fpn_v2(weights='DEFAULT') # Get the number of input features for the classifier in_features_box = model.roi_heads.box_predictor.cls_score.in_features in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels # Get the numbner of output channels for the Mask Predictor dim_reduced = model.roi_heads.mask_predictor.conv5_mask.out_channels # Replace the box predictor model.roi_heads.box_predictor = FastRCNNPredictor(in_channels=in_features_box, num_classes=len(class_names)) # Replace the mask predictor model.roi_heads.mask_predictor = MaskRCNNPredictor(in_channels=in_features_mask, dim_reduced=dim_reduced, num_classes=len(class_names)) # Set the model's device and data type model.to(device=device, dtype=dtype); # Add attributes to store the device and model name for later reference model.device = device model.name = 'maskrcnn_resnet50_fpn_v2'
atau menggunakan https://www.kaggle.com/code/suraj520/maskrcnn-iou-0-79-0-31-know-train-infer?scriptVersionId=136698261
from torchvision import datasets,models
# bbox and mask predictors
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
def SegmentationModel(num_classes):
#loading instance seg pretrained model
model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
#replacing pretrained head with new
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,num_classes)
#now doing the same for input features for mask
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer=512
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,hidden_layer,num_classes)
return model
Bila lebih advance bisa menggunakan argument input berikut
model = torchvision.models.get_model(
'maskrcnn_resnet50_fpn_v2',
weights = 'MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1',
#num_classes = len(class_names),
box_detections_per_img = 1000,
min_size = 600,
max_size = 2000)
model.roi_heads.box_predictor.cls_score = nn.Linear(in_features=1024,out_features=len(class_names),bias=True)
model.roi_heads.box_predictor.bbox_pred = nn.Linear(in_features=1024,out_features=len(class_names)*4,bias=True)
model.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d(256,len(class_names),kernel_size=(1,1),stride=(1,1))
model.to(device)# move model to the right device