谷歌Pixel 2人像模式代码曝光，你看懂了吗?

谷歌把他们所应用的 AI 图像分层算法 DeepLab-v3+ 变成开源代码，让第三方相机 app 都可以利用借此神经网络。

开源代码：

import tensorflow as tf

from deeplab.core import feature_extractor

slim = tf.contrib.slim

_LOGITS_SCOPE_NAME = ‘logits’

_MERGED_LOGITS_SCOPE = ‘merged_logits’

_IMAGE_POOLING_SCOPE = ‘image_pooling’

_ASPP_SCOPE = ‘aspp’

_CONCAT_PROJECTION_SCOPE = ‘concat_projection’

_DECODER_SCOPE = ‘decoder’

def get_extra_layer_scopes():

“””Gets the scopes for extra layers.

Returns:

A list of scopes for extra layers.

“””

return [

_LOGITS_SCOPE_NAME,

_IMAGE_POOLING_SCOPE,

_ASPP_SCOPE,

_CONCAT_PROJECTION_SCOPE,

_DECODER_SCOPE,

]

def predict_labels_multi_scale(images,

model_options,

eval_scales=(1.0,),

add_flipped_images=False):

“””Predicts segmentation labels.

Args:

images: A tensor of size [batch, height, width, channels].

model_options: A ModelOptions instance to configure models.

eval_scales: The scales to resize images for evaluation.

add_flipped_images: Add flipped images for evaluation or not.

Returns:

A dictionary with keys specifying the output_type (e.g., semantic

prediction) and values storing Tensors representing predictions (argmax

over channels). Each prediction has size [batch, height, width].

“””

outputs_to_predictions = {

output: []

for output in model_options.outputs_to_num_classes

}

for i, image_scale in enumerate(eval_scales):

with tf.variable_scope(tf.get_variable_scope(), reuse=True if i else None):

outputs_to_scales_to_logits = multi_scale_logits(

images,

model_options=model_options,

image_pyramid=[image_scale],

is_training=False,

fine_tune_batch_norm=False)

if add_flipped_images:

with tf.variable_scope(tf.get_variable_scope(), reuse=True):

outputs_to_scales_to_logits_reversed = multi_scale_logits(

tf.reverse_v2(images, [2]),

model_options=model_options,

image_pyramid=[image_scale],

is_training=False,

fine_tune_batch_norm=False)

for output in sorted(outputs_to_scales_to_logits):

scales_to_logits = outputs_to_scales_to_logits[output]

logits = tf.image.resize_bilinear(

scales_to_logits[_MERGED_LOGITS_SCOPE],

tf.shape(images)[1:3],

align_corners=True)

outputs_to_predictions[output].append(

tf.expand_dims(tf.nn.softmax(logits), 4))

if add_flipped_images:

scales_to_logits_reversed = (

outputs_to_scales_to_logits_reversed[output])

logits_reversed = tf.image.resize_bilinear(

tf.reverse_v2(scales_to_logits_reversed[_MERGED_LOGITS_SCOPE], [2]),

tf.shape(images)[1:3],

align_corners=True)

outputs_to_predictions[output].append(

tf.expand_dims(tf.nn.softmax(logits_reversed), 4))

for output in sorted(outputs_to_predictions):

predictions = outputs_to_predictions[output]

# Compute average prediction across different scales and flipped images.

predictions = tf.reduce_mean(tf.concat(predictions, 4), axis=4)

outputs_to_predictions[output] = tf.argmax(predictions, 3)

return outputs_to_predictions

def predict_labels(images, model_options, image_pyramid=None):

“””Predicts segmentation labels.

Args:

images: A tensor of size [batch, height, width, channels].

model_options: A ModelOptions instance to configure models.

image_pyramid: Input image scales for multi-scale feature extraction.

Returns:

A dictionary with keys specifying the output_type (e.g., semantic

prediction) and values storing Tensors representing predictions (argmax

over channels). Each prediction has size [batch, height, width].

“””

outputs_to_scales_to_logits = multi_scale_logits(

images,

model_options=model_options,

image_pyramid=image_pyramid,

is_training=False,

fine_tune_batch_norm=False)

predictions = {}

for output in sorted(outputs_to_scales_to_logits):

scales_to_logits = outputs_to_scales_to_logits[output]

logits = tf.image.resize_bilinear(

scales_to_logits[_MERGED_LOGITS_SCOPE],

tf.shape(images)[1:3],

align_corners=True)

predictions[output] = tf.argmax(logits, 3)

return predictions

def scale_dimension(dim, scale):

“””Scales the input dimension.

Args:

dim: Input dimension (a scalar or a scalar Tensor).

scale: The amount of scaling applied to the input.

Returns:

Scaled dimension.

“””

if isinstance(dim, tf.Tensor):

return tf.cast((tf.to_float(dim) – 1.0)
scale + 1.0, dtype=tf.int32)
else:
return int((float(dim) – 1.0) scale + 1.0)

def multi_scale_logits(images,

model_options,

image_pyramid,

weight_decay=0.0001,

is_training=False,

fine_tune_batch_norm=False):

“””Gets the logits for multi-scale inputs.

The returned logits are all downsampled (due to max-pooling layers)

for both training and evaluation.

更多查看：
https://github.com/tensorflow/models/tree/master/research/deeplab

声明：小猿资源站是一个资源分享和技术交流平台，本站所发布的一切破解补丁、注册机和注册信息及软件的解密分析文章仅限用于学习和研究目的；不得将上述内容用于商业或者非法用途，否则，一切后果请用户自负。本站信息来自网络，版权争议与本站无关。您必须在下载后的24个小时之内，从您的电脑中彻底删除上述内容。如果您喜欢该程序，请支持正版软件，购买注册，得到更好的正版服务。如若本站内容侵犯了原著者的合法权益，可联系我们进行处理。

相关文章