Python中roi_data_layer.roidb的中文标题和数据层生成方法。
发布时间:2024-01-02 13:22:44
roi_data_layer.roidb中文标题:区域兴趣目标(ROI)数据层Roidb
数据层生成方法:在Python中,可以使用RPN(Region Proposal Network)或者使用标注的目标框,从图像中提取ROI(Region of Interest)并生成roidb。
下面是使用RPN生成roidb的示例代码:
import numpy as np
import cv2
import tensorflow as tf
from lib.model.config import cfg
from lib.model.rpn.bbox_transform import bbox_transform_inv, clip_boxes
from lib.model.nms_wrapper import nms
# 定义生成RPN proposals的函数
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales):
num_anchors = len(cfg.ANCHOR_SCALES) * len(cfg.ANCHOR_RATIOS)
# 根据rpn_cls_prob和rpn_bbox_pred生成proposals
_, _, height, width = rpn_cls_prob.shape
# 生成anchors
anchors = generate_anchors(scales=np.array(anchor_scales))
num_anchors = anchors.shape[0]
# 将rpn_cls_prob和rpn_bbox_predreshape为[1, H * W * num_anchors, 2]和[1, H * W * num_anchors, 4]的形状
rpn_cls_prob_reshape = tf.reshape(rpn_cls_prob, [1, height, width, num_anchors, 2])
rpn_cls_prob_reshape = tf.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2])
rpn_cls_prob_reshape = tf.reshape(rpn_cls_prob_reshape, [-1, 2])
rpn_bbox_pred_reshape = tf.reshape(rpn_bbox_pred, [1, height, width, num_anchors, 4])
rpn_bbox_pred_reshape = tf.transpose(rpn_bbox_pred_reshape, [0, 3, 1, 2])
rpn_bbox_pred_reshape = tf.reshape(rpn_bbox_pred_reshape, [-1, 4])
# 根据rpn_cls_prob_reshape和rpn_bbox_pred_reshape生成bbox_deltas
bbox_deltas = rpn_bbox_pred_reshape
# 根据im_info和_feat_stride生成proposals
height = tf.to_float(im_info[0][0])
width = tf.to_float(im_info[0][1])
height = tf.ceil(height / _feat_stride[0])
width = tf.ceil(width / _feat_stride[0])
height = tf.to_int32(height)
width = tf.to_int32(width)
# proposals的形状为[N, 4],其中N是生成的proposals的数量
proposals = bbox_transform_inv(anchors, bbox_deltas)
proposals = clip_boxes(proposals, [height, width])
# 使用NMS筛选proposals
scores = rpn_cls_prob_reshape[:, 1]
ind = tf.where(tf.greater(scores, cfg.RPN_CONF_THRESH))
ind = tf.to_int32(ind)
scores = tf.gather(scores, ind)
proposals = tf.gather(proposals, ind)
bbox_deltas = tf.gather(bbox_deltas, ind)
proposals = tf.concat([proposals, scores[:, np.newaxis]], axis=1)
num_proposals = tf.minimum(tf.shape(proposals)[0], cfg.RPN_POST_NMS_TOP_N)
proposals = tf.nn.top_k(proposals[:, 4], k=num_proposals)
proposals = tf.concat([proposals.values[:, np.newaxis], proposals.indices[:, np.newaxis]], axis=1)
return proposals
# 生成roidb的函数
def get_roidb(imdb):
num_images = len(imdb.image_index)
roidb = []
for i in range(num_images):
roi = dict()
roi['image'] = imdb.image_path_from_index(i)
roi['width'] = imdb.image_widths[i]
roi['height'] = imdb.image_heights[i]
roi['flipped'] = False
gt_overlaps = np.zeros((imdb.num_classes, 2))
gt_overlaps = np.ascontiguousarray(gt_overlaps, dtype=np.float32)
max_overlaps = np.zeros((imdb.num_classes))
max_overlaps = np.ascontiguousarray(max_overlaps, dtype=np.float32)
max_classes = np.zeros((imdb.num_classes))
max_classes = np.ascontiguousarray(max_classes, dtype=np.float32)
gt_boxes = np.zeros((imdb.num_classes, 4))
gt_boxes = np.ascontiguousarray(gt_boxes, dtype=np.float32)
# 从数据库或者标注文件中获取ground truth目标框的信息
gt_inds = np.where(imdb.gt_classes[i] != 0)[0]
gt_boxes = imdb.gt_boxes[i]
gt_classes = imdb.gt_classes[i]
overlaps = bbox_overlaps(
np.ascontiguousarray(proposals[:, :4], dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmaxes = np.argmax(overlaps, axis=1)
maxes = overlaps[np.arange(proposals.shape[0]), argmaxes]
gt_argmaxes = np.argmax(overlaps, axis=0)
gt_maxes = overlaps[gt_argmaxes, np.arange(overlaps.shape[1])]
gt_argmaxes = np.where(overlaps == gt_maxes)[0]
for j in gt_inds:
gt_overlaps[j, 0] = 1.0
gt_overlaps[j, 1] = 1.0
max_overlaps[j] = 1.0
max_classes[j] = gt_classes[j]
if imdb.config['rpn_clobber_positives']:
max_overlaps[np.where(overlaps[:, j] > 0)[0]] = 0.0
max_overlaps[argmaxes] = 1.0
max_classes[argmaxes] = gt_classes[j]
keep_inds = np.append(
np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0],
gt_argmaxes)
keep_inds = np.unique(keep_inds)
mask = np.zeros((num_proposal), dtype=bool)
mask[keep_inds] = 1
roi['gt_overlaps'] = gt_overlaps
roi['max_classes'] = max_classes
roi['max_overlaps'] = max_overlaps
roi['bbox_targets'] = np.zeros((num_proposal, 4*2), dtype=np.float32)
roi['bbox_inside_weights'] = np.zeros((num_proposal, 4*2), dtype=np.float32)
roi['bbox_outside_weights'] = np.zeros((num_proposal, 4*2), dtype=np.float32)
roi['max_classes'] = max_classes
roi['max_overlaps'] = max_overlaps
roidb.append(roi)
return roidb
以上代码是一个简化版本的RPN生成roidb的示例,其中包括了使用RPN提取proposals的函数和生成roidb的函数。在实际使用过程中,可能需要根据具体的需求进行修改。
生成的roidb是一个包含了每个图像信息的列表,每个列表元素是一个字典,包含了图像路径、图像宽高、ground truth目标框信息等。
使用示例:
imdb = load_imdb('path/to/dataset')
roidb = get_roidb(imdb)
以上示例代码加载了一个数据集,并使用get_roidb函数生成了roidb。根据具体情况,可以对roidb进行进一步处理,比如进行数据增强、制作TFRecord文件等。
