# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Metrics for 3D detection problems."""
from lingvo import compat as tf
from lingvo.core import metrics
from lingvo.core import plot
from lingvo.core import py_utils
from lingvo.tasks.car import summary
from lingvo.tasks.car import transform_util
import matplotlib.colors as matplotlib_colors
import matplotlib.patches as matplotlib_patches
import matplotlib.patheffects as path_effects
import numpy as np
import PIL.Image as Image
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
from tensorboard.plugins.mesh import summary as mesh_summary
[docs]class TopDownVisualizationMetric(metrics.BaseMetric):
"""Top-down detection visualization, expecting 3D laser points and 2D bboxes.
Updates to this metric is expected to be `.NestedMap` containing:
- visualization_labels: [N, B1] int tensor containing visualization labels.
- predicted_bboxes: [N, B1, 5] float tensor containing predicted 2D bboxes
each with (x, y, dx, dy, phi).
- visualization_weights: [N, B1] float tensor containing weights for each
prediction. predictions with 0 weight will not be drawn.
- points_xyz: [N, P, 3] float tensor containing (x, y, z) coordinates.
- points_padding: [N, P] tensor containing 1 if the point is a padded point.
- gt_bboxes_2d: [N, B2, 5] float tensor containing ground-truth 2D bboxes.
- gt_bboxes_2d_weights: [N, B2] float tensor containing weights for each
ground-truth. predictions with 0 weight will not be drawn. The
ground-truth mask can be used here.
- labels: [N, B2] int tensor containing ground-truth labels.
- difficulties: [N, B2]: int tensor containing the difficulty levels of
each groundtruth box.
Default parameters visualize the area around the car, with the car centered in
the image, over a 32m x 48m range.
Ground-truth boxes will be drawn with color=cyan (see DrawBBoxesOnImages for
details). Predicted boxes will be drawn with a color from the PIL color list,
with a different color per class.
"""
def __init__(self,
top_down_transform,
class_id_to_name=None,
image_height=1536,
image_width=1024,
figsize=None,
ground_removal_threshold=-1.35,
sampler_num_samples=8):
"""Initialize TopDownVisualizationMetric.
Args:
top_down_transform: transform_util.Transform object that specifies
how to transform a coordinate in the world coordinate to the top
down projection. See documentation for
transform_util.MakeCarToImageTransform for more details on
configuration.
class_id_to_name: Dictionary mapping from class id to name.
image_height: int image height.
image_width: int image width.
figsize: (w, h) float tuple. This is the size of the rendered figure in
inches. A dpi=100 is used in plot.Image; note that the axes and title
will take up space in the final rendering. If None, this will default to
(image_width / 100 * 1.5, image_height / 100 * 1.5).
ground_removal_threshold: Floating point value used to color ground points
differently. Defaults to -1.35 which happens to work well for KITTI.
sampler_num_samples: Number of batches to keep for visualizing.
"""
self._class_id_to_name = class_id_to_name or {}
self._image_width = image_width
self._image_height = image_height
figsize = figsize or (image_width / 100. * 1.5, image_height / 100. * 1.5)
self._figsize = figsize
self._ground_removal_threshold = ground_removal_threshold
self._sampler = py_utils.UniformSampler(num_samples=sampler_num_samples)
self._top_down_transform = top_down_transform
self._summary = None
[docs] def Update(self, decoded_outputs):
"""Add top down visualization to summaries.
Args:
decoded_outputs: A `.NestedMap` containing the fields
visualization_labels, predicted_bboxes, visualization_weights,
points_xyz, points_padding, gt_bboxes_2d, gt_bboxes_2d_weights, and
labels.
"""
self._sampler.Add(decoded_outputs)
# Invalidate cache.
self._summary = None
[docs] def _XYWHToExtrema(self, bboxes):
"""Convert from x, y, dx, dy to extrema ymin, xmin, ymax, xmax."""
mtrix = np.array([
# x y dx dy
[0.0, 1.0, 0.0, -.5], # ymin
[1.0, 0.0, -.5, 0.0], # xmin
[0.0, 1.0, 0.0, 0.5], # ymax
[1.0, 0.0, 0.5, 0.0], # xmax
]).T
bboxes = bboxes.copy()
bboxes[..., :4] = np.matmul(bboxes[..., :4], mtrix)
return bboxes
[docs] def _DrawLasers(self, images, points_xyz, points_padding, transform):
"""Draw laser points."""
for batch_idx in range(images.shape[0]):
for points_idx in range(points_xyz.shape[1]):
if points_padding[batch_idx, points_idx] == 0:
x, y, z = points_xyz[batch_idx, points_idx, :3]
tx, ty, _ = transform_util.TransformPoint(transform, x, y, z)
if tx < 0 or ty < 0 or tx >= images.shape[2] or ty >= images.shape[1]:
continue
# Drop ground points from visualization.
if z < self._ground_removal_threshold:
# Brown out the color for ground points.
color = (64, 48, 48)
else:
color = (255, 255, 255)
images[batch_idx, int(ty), int(tx), :] = color
[docs] def Summary(self, name):
self._EvaluateIfNecessary(name)
return self._summary
[docs] def _EvaluateIfNecessary(self, name):
"""Create a top down image summary, if not already created."""
if self._summary is not None:
return
tf.logging.info('Generating top down summary.')
ret = tf.Summary()
transform = self._top_down_transform
for batch_idx, batch_sample in enumerate(self._sampler.samples):
batch_size = batch_sample.labels.shape[0]
visualization_labels = batch_sample.visualization_labels
predicted_bboxes = batch_sample.predicted_bboxes
visualization_weights = batch_sample.visualization_weights
points_xyz = batch_sample.points_xyz
points_padding = batch_sample.points_padding
gt_bboxes_2d = batch_sample.gt_bboxes_2d
gt_bboxes_2d_weights = batch_sample.gt_bboxes_2d_weights
labels = batch_sample.labels
difficulties = batch_sample.difficulties
source_ids = batch_sample.source_ids
# Create base images for entire batch that we will update.
images = np.zeros([batch_size, self._image_height, self._image_width, 3],
dtype=np.uint8)
# Draw lasers first, so that bboxes can be on top.
self._DrawLasers(images, points_xyz, points_padding, transform)
# Draw ground-truth bboxes.
gt_bboxes_2d = np.where(
np.expand_dims(gt_bboxes_2d_weights > 0, -1), gt_bboxes_2d,
np.zeros_like(gt_bboxes_2d))
transformed_gt_bboxes_2d = summary.TransformBBoxesToTopDown(
gt_bboxes_2d, transform)
summary.DrawBBoxesOnImages(
images,
transformed_gt_bboxes_2d,
gt_bboxes_2d_weights,
labels,
self._class_id_to_name,
groundtruth=True)
# Draw predicted bboxes.
predicted_bboxes = np.where(
np.expand_dims(visualization_weights > 0, -1), predicted_bboxes,
np.zeros_like(predicted_bboxes))
transformed_predicted_bboxes = summary.TransformBBoxesToTopDown(
predicted_bboxes, transform)
summary.DrawBBoxesOnImages(
images,
transformed_predicted_bboxes,
visualization_weights,
visualization_labels,
self._class_id_to_name,
groundtruth=False)
# Draw the difficulties on the image.
self.DrawDifficulty(images, transformed_gt_bboxes_2d,
gt_bboxes_2d_weights, difficulties)
for idx in range(batch_size):
source_id = source_ids[idx]
def AnnotateImage(fig, axes, source_id=source_id):
"""Add source_id to image."""
del fig
# Draw in top middle of image.
text = axes.text(
500,
15,
source_id,
fontsize=16,
color='blue',
fontweight='bold',
horizontalalignment='center')
text.set_path_effects([
path_effects.Stroke(linewidth=3, foreground='lightblue'),
path_effects.Normal()
])
image_summary = plot.Image(
name='{}/{}/{}'.format(name, batch_idx, idx),
aspect='equal',
figsize=self._figsize,
image=images[idx, ...],
setter=AnnotateImage)
ret.value.extend(image_summary.value)
tf.logging.info('Done generating top down summary.')
self._summary = ret
[docs] def DrawDifficulty(self, images, gt_bboxes, gt_box_weights, difficulties):
"""Draw the difficulty values on each ground truth box."""
batch_size = np.shape(images)[0]
try:
font = ImageFont.truetype('arial.ttf', size=20)
except IOError:
font = ImageFont.load_default()
for batch_id in range(batch_size):
image = images[batch_id, :, :, :]
original_image = image
image = Image.fromarray(np.uint8(original_image)).convert('RGB')
draw = ImageDraw.Draw(image)
difficulty_vector = difficulties[batch_id]
box_data = gt_bboxes[batch_id]
for box_id in range(box_data.shape[0]):
box_weight = gt_box_weights[batch_id, box_id]
if box_weight == 0:
continue
center_x = box_data[box_id, 0]
center_y = box_data[box_id, 1]
difficulty_value = str(difficulty_vector[box_id])
# Draw a rectangle background slightly larger than the text.
text_width, text_height = font.getsize(difficulty_value)
draw.rectangle(
[(center_x - text_width / 1.8, center_y - text_height / 1.8),
(center_x + text_width / 1.8, center_y + text_height / 1.8)],
fill='darkcyan')
# Center the text in the rectangle
draw.text((center_x - text_width / 2, center_y - text_height / 2),
str(difficulty_value),
fill='lightcyan',
font=font)
np.copyto(original_image, np.array(image))
[docs]class WorldViewer(metrics.BaseMetric):
"""World Viewer for 3d point cloud scenes."""
# Defines the maximum hue range for point cloud colorization by distance.
_MAX_HUE = 0.65
# Distance from car after which we consider all points equally far.
_MAX_DISTANCE_METERS = 40.
def __init__(self, sampler_num_samples=8):
"""Init."""
self._sampler = py_utils.UniformSampler(num_samples=sampler_num_samples)
self._summary = None
[docs] def Update(self, decoded_outputs):
"""Add point cloud mesh data to be summarized.
Args:
decoded_outputs: A `.NestedMap` containing the fields
visualization_labels, predicted_bboxes, visualization_weights,
points_xyz, points_padding, gt_bboxes_2d, gt_bboxes_2d_weights, and
labels.
"""
self._sampler.Add(decoded_outputs)
# Invalidate cache.
self._summary = None
[docs] def Summary(self, name):
self._EvaluateIfNecessary(name)
return self._summary
[docs] def _EvaluateIfNecessary(self, name):
"""Create a mesh summary, if not already created."""
if self._summary is not None:
return
summ = None
tf.logging.info('Generating mesh summary.')
for i, batch_sample in enumerate(self._sampler.samples):
points_xyz = batch_sample.points_xyz[i:i + 1]
points_padding = batch_sample.points_padding[i:i + 1]
points_mask = (1. - points_padding).astype(bool)
# Apply mask and expand to include a batch dimension.
points_xyz = points_xyz[points_mask][np.newaxis, ...]
# Compute colors based off distance from car.
distance = np.sqrt(points_xyz[0, :, 0]**2 + points_xyz[0, :, 1]**2 +
points_xyz[0, :, 2]**2)
# Normalize by some max distance beyond which we don't distinguish
# distance.
max_distance = np.ones_like(distance) * WorldViewer._MAX_DISTANCE_METERS
distance = np.minimum(max_distance, distance)
scale = (max_distance - distance) / max_distance
# Convert to RGB.
hue = np.minimum(WorldViewer._MAX_HUE, scale)[..., np.newaxis]
# Invert hue so red is closer.
hue = WorldViewer._MAX_HUE - hue
s, v = np.ones_like(hue), np.ones_like(hue)
hsv = np.hstack([hue, s, v])
rgb = matplotlib_colors.hsv_to_rgb(hsv)
colors = np.minimum(255., rgb * 255.).astype(np.uint8)
colors = colors[np.newaxis, ...]
summ = mesh_summary.pb(
'{}/point_cloud/{}'.format(name, i),
vertices=points_xyz,
colors=colors,
faces=None)
# At the moment, only one scene summary is supported; writing
# more makes the TensorBoard mesh visualizer hang.
break
if summ:
self._summary = summ
[docs]class CameraVisualization(metrics.BaseMetric):
"""Camera detection visualization.
Visualizes a camera image and predicted bounding boxes on top
of the image.
Updates to this metric is expected to be `.NestedMap` containing:
camera_images: [N, W, H, 3] float tensor containing camera image data.
bbox_corners: [N, B1, 8, 2] float tensor containing bounding box corners.
For each batch (N), for each box B, there are 8 corners, each with
an X and Y value.
bbox_scores: [N, B1] float tensor containing predicted box scores.
"""
def __init__(self,
figsize=(15, 15),
bbox_score_threshold=0.01,
sampler_num_samples=8,
draw_3d_boxes=True):
"""Initialize CameraVisualization.
Args:
figsize: (w, h) float tuple. This is the size of the rendered figure in
inches. A dpi=100 is used in plot.Image; note that the axes and title
will take up space in the final rendering. If None, this will default to
(image_width / 100 * 1.5, image_height / 100 * 1.5).
bbox_score_threshold: The threshold over which bboxes will be drawn on the
image.
sampler_num_samples: Number of batches to keep for visualizing.
draw_3d_boxes: Whether to draw 2d or 3d bounding boxes. 3d bounding
boxes depict the 8 corners of the bounding box, whereas the 2d
bounding boxes depict the extrema x and y dimensions of the boxes
on the image plane.
"""
self._figsize = figsize
self._bbox_score_threshold = bbox_score_threshold,
self._sampler = py_utils.UniformSampler(num_samples=sampler_num_samples)
self._draw_3d_boxes = draw_3d_boxes
self._summary = None
[docs] def Update(self, decoded_outputs):
self._sampler.Add(decoded_outputs)
# Invalidate cache.
self._summary = None
[docs] def Summary(self, name):
self._EvaluateIfNecessary(name)
return self._summary
[docs] def _EvaluateIfNecessary(self, name):
"""Create a camera image summary if not already created."""
if self._summary is not None:
return
ret = tf.Summary()
for sample_idx, sample in enumerate(self._sampler.samples):
batch_size = sample.camera_images.shape[0]
for batch_idx in range(batch_size):
image = sample.camera_images[batch_idx]
# [num bboxes, 8, 2].
bbox_corners = sample.bbox_corners[batch_idx]
# [num_bboxes]
bbox_scores = sample.bbox_scores[batch_idx]
def Draw3DBoxes(fig,
axes,
bbox_corners=bbox_corners,
bbox_scores=bbox_scores):
"""Draw 3d bounding boxes."""
del fig
for bbox_id in range(bbox_corners.shape[0]):
# Skip visualizing low-scoring boxes.
bbox_score = bbox_scores[bbox_id]
if bbox_score < self._bbox_score_threshold:
continue
bbox_data = bbox_corners[bbox_id]
# Draw the score of each box.
#
# Turn score into an integer for better display.
center_x = np.mean(bbox_data[:, 0])
center_y = np.mean(bbox_data[:, 1])
bbox_score = int(bbox_score * 100)
text = axes.text(
center_x,
center_y,
bbox_score,
fontsize=12,
color='red',
fontweight='bold')
text.set_bbox(dict(facecolor='yellow', alpha=0.4))
# The BBoxToCorners function produces the points
# in a deterministic order, which we use to draw
# the faces of the polygon.
#
# The first 4 points are the "top" of the bounding box.
# The second 4 points are the "bottom" of the bounding box.
#
# We then draw the last 4 connecting points by choosing
# two of the connecting faces in the right order.
face_points = []
face_points += [[
bbox_data[0, :], bbox_data[1, :], bbox_data[2, :],
bbox_data[3, :]
]]
face_points += [[
bbox_data[4, :], bbox_data[5, :], bbox_data[6, :],
bbox_data[7, :]
]]
face_points += [[
bbox_data[1, :], bbox_data[2, :], bbox_data[6, :],
bbox_data[5, :]
]]
face_points += [[
bbox_data[0, :], bbox_data[3, :], bbox_data[7, :],
bbox_data[4, :]
]]
for face in face_points:
# Each face is a list of 4 x,y points
face_xy = np.array(face)
axes.add_patch(
matplotlib_patches.Polygon(
face_xy, closed=True, edgecolor='red', facecolor='none'))
def Draw2DBoxes(fig,
axes,
bbox_corners=bbox_corners,
bbox_scores=bbox_scores):
"""Draw 2d boxes on the figure."""
del fig
# Extract the 2D extrema of each bbox and the max score
for bbox_id in range(bbox_corners.shape[0]):
# Skip visualizing low-scoring boxes.
bbox_score = bbox_scores[bbox_id]
if bbox_score < self._bbox_score_threshold:
continue
bbox_data = bbox_corners[bbox_id]
ymin = np.min(bbox_data[:, 1])
xmin = np.min(bbox_data[:, 0])
ymax = np.max(bbox_data[:, 1])
xmax = np.max(bbox_data[:, 0])
height = ymax - ymin
width = xmax - xmin
# Turn score into an integer for better display.
bbox_score = int(bbox_score * 100)
text = axes.text(
xmin,
ymin,
bbox_score,
fontsize=12,
color='red',
fontweight='bold')
text.set_bbox(dict(facecolor='yellow', alpha=0.4))
axes.add_patch(
matplotlib_patches.Rectangle((xmin, ymin),
width,
height,
edgecolor='red',
facecolor='none'))
# For each image, draw the boxes on that image.
draw_fn = Draw3DBoxes if self._draw_3d_boxes else Draw2DBoxes
image_summary = plot.Image(
name='{}/{}/{}'.format(name, sample_idx, batch_idx),
aspect='equal',
figsize=self._figsize,
image=image,
setter=draw_fn)
ret.value.extend(image_summary.value)
self._summary = ret