[ML] Object Detection & Segmentation(3)
📌 Top Instance Segmentation Models
코랩에서 구글 드라이브
- 경로 설정(gdrive/MyDrive 경로로 이동한다.)
from google.colab import drive drive.mount("/content/gdrive")
- 클론하고 싶은 모델을 다운 받는다. -> MyDrive에 클론됨.
!git clone https://github.com/facebookresearch/detectron2.git
- os 경로 설정
import os os.chdir("/content/gdrive/MyDrive/detectron2")
COCO dataset 다운받기
!pip install CocoDataset==0.1.2
!wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip
!unzip /content/gdrive/MyDrive/annotations_trainval2014.zip
from coco_dataset import coco_dataset_download as cocod
class_name='person' #class name
images_count=50 #count of images
annotations_path='/content/gdrive/MyDrive/annotations/instances_train2014.json' #path of coco dataset annotations
#call download function
cocod.coco_dataset_download(class_name,images_count,annotations_path)
경로: /content/gdrive/MyDrive/annotations_trainval2014.zip
이미지경로: /content/gdrive/MyDrive/person/{image_name}
detectron2_refactor
랜덤 컬러로 이미지 위에 칠해지도록 수정
def load_model(threshold):
# Create and set model's config
cfg = get_cfg()
cfg.merge_from_file("/content/gdrive/MyDrive/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
# Create model
predictor = DefaultPredictor(cfg)
return predictor
def get_predictor(img_path, threshold):
# Load image (np.array type)
img = cv2.imread(img_path)
# Inferrence
outputs = predictor(img)
return outputs
def random_colour_masks(image):
colours = [[0, 255, 0],[0, 0, 255],[255, 0, 0],[0, 255, 255],[255, 255, 0],[255, 0, 255],[80, 70, 180],[250, 80, 190],[245, 145, 50],[70, 150, 250],[50, 190, 190]]
r = np.zeros_like(image).astype(np.uint8)
g = np.zeros_like(image).astype(np.uint8)
b = np.zeros_like(image).astype(np.uint8)
r[image == 1], g[image == 1], b[image == 1] = colours[random.randrange(0, 10)]
coloured_mask = np.stack([r, g, b], axis=2)
return coloured_mask
def instance_segmentation_api(img_path, threshold):
img = cv2.imread(img_path)
predictor = load_model(threshold)
outputs = predictor(img)
# Tensor: gpu -> cpu
outputs['instances'] = outputs['instances'].to('cpu')
masks = outputs['instances'].pred_masks
# obj_mask = np.sum(np.asarray(masks.unsqueeze(-1)), 0)
for i in range(len(masks)):
rgb_mask = random_colour_masks(masks[i])
img = cv2.addWeighted(img, 1, rgb_mask, 0.5, 0)
plt.figure(figsize=(20, 20))
plt.imshow(img)
plt.xticks([])
plt.yticks([])
plt.show()
Model Input Format
- image: (C, H, W) 포멧의 Tensor
- height, weight: 원하는 output의 height, weight 값
- instances: 학습에 사용되는 instance 객체(gt_boxes/gt_classes/gt_masks/gt_keypoints)
- sem_seg: sementic seg의 GT
- proposal
Model Output Format
- instances: pred_boxes/scores/pred_classes/pred_masks/pred_keypoints를 갖는다.
threshold 수정
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
Mask R-CNN with torchvision
-
Train Mask R-CNN for Image Segmentation -> 이거 해보고 싶은데,,,,, 라벨링 개귀찮 ㅎ
Input and Output
- input: list of tensor images of shape (n, c, h, w), size of images need not be fixed
- n: number of images
- c: number of channels, RGB = 3
- h: height of image
- w: width of image
- output
- coordinates of bounding boxes
- labels of classes(input image, scores of the labels)
- the masks
Load Model
# Model loading
from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights
import torchvision
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model.eval()
def get_prediction
def get_prediction(img_path, threshold):
img = Image.open(img_path)
# converted to image tensor using PyTorch's transform
transform = T.Compose([T.ToTensor()])
img = transform(img)
# passing image to model
pred = model([img])
# make return value: masks, pred_boxes, pred_class
pred_score = list(pred[0]['scores'].detach().numpy())
pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1]
masks = (pred[0]['masks'] > 0.5).squeeze().detach().cpu().numpy()
pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().numpy())]
# 찾은 모든 인스턴스 중 threshold 이상되는 인스턴스까지 짜름
masks = masks[:pred_t+1]
pred_boxes = pred_boxes[:pred_t+1]
pred_class = pred_class[:pred_t+1]
return masks, pred_boxes, pred_class
- pred 구조: [{key: value}]
- boxes
- labels
- scores
- masks
def random_colour_masks
# The masks of each predicted object is given random colour from a set of 11 predefined colours for visualization of the masks
def random_colour_masks(image):
colours = [[0, 255, 0],[0, 0, 255],[255, 0, 0],[0, 255, 255],[255, 255, 0],[255, 0, 255],[80, 70, 180],[250, 80, 190],[245, 145, 50],[70, 150, 250],[50, 190, 190]]
r = np.zeros_like(image).astype(np.uint8)
g = np.zeros_like(image).astype(np.uint8)
b = np.zeros_like(image).astype(np.uint8)
r[image == 1], g[image == 1], b[image == 1] = colours[random.randrange(0, 10)]
coloured_mask = np.stack([r, g, b], axis=2)
return coloured_mask
랜덤 색깔 결정
def instance_segmentation_api
# instance_segmentation
def instance_segmentation_api(img_path, threshold=0.5, rect_th=1, text_size=0.4, text_th=1):
masks, boxes, pred_cls = get_prediction(img_path, threshold)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
for i in range(len(masks)):
rgb_mask = random_colour_masks(masks[i])
img = cv2.addWeighted(img, 1, rgb_mask, 0.5, 0)
c1, c2 = (int(boxes[i][0][0]), int(boxes[i][0][1])), (int(boxes[i][1][0]), int(boxes[i][1][1]))
cv2.rectangle(img, c1, c2, color=(0, 255, 0), thickness=rect_th)
cv2.putText(img, pred_cls[i], c1, cv2.FONT_HERSHEY_SIMPLEX, text_size, (0, 255, 0), thickness=text_th)
plt.figure(figsize=(20, 20))
plt.imshow(img)
plt.xticks([])
plt.yticks([])
plt.show()
결과물 보여주는 함수
성능 괜찮은 것 같은데,,,,
YOLOv8
욜로 버전 8을 사용한 모델 2개를 사용해봤는데 둘다 어떻게 시각화를 하는지 모르겠음,, 하나는 시각화가 되는데 특정 사진으로만 되고, 하나는 텐서가 나오기는 하는데 걔를 어떻게 이미지화 하는지 모르겠음…
댓글남기기