图像分类¶
resnet_v1d_50¶
##-*- encoding:utf-8 -*-
# Author: wenmeng.zwm@alibaba-inc.com
# Date: 2018-09-06
# classfication_resnet50.config: config resnet50 model for configuration
#
model_config {
model_class: 'Classification'
classification {
backbone {
class_name: 'resnet_v1d_50'
weight_decay: 0.0001
}
num_classes: 1000
loss {
weighted_softmax {
}
}
label_id_offset: 1
}
}
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0
schedule {
step: 5000
learning_rate: 0.1
}
schedule {
step: 200000
learning_rate: 0.01
}
schedule {
step: 400000
learning_rate: 0.001
}
schedule {
step: 600000
learning_rate: 0.0001
}
warmup: true
}
}
momentum_optimizer_value: 0.9
}
}
#gradient_clipping_by_norm : 10.0
#distribute training setting
sync_replicas: true
#using 8 gpu
replicas_to_aggregate:8
num_worker_replicas: 8
num_steps: 800000
model_dir: 'experiments/imagenet_resnet50_dis/train'
}
train_data: {
input_path: "data/imagenet_tfrecord/train-*"
batch_size: 32
num_readers: 4
shuffle: true
read_block_length: 32
classification_decoder_config{
}
data_augmentation_options {
random_distort_color {
color_ordering: 0
fast_mode: true
}
}
data_augmentation_options {
vgg_preprocessing {
is_training: true
}
}
}
eval_config: {
num_examples: 50000
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
# max_evals: 10
metrics_set : 'classification_metric'
}
eval_data : {
input_path: "data/imagenet_tfrecord/validation-*"
batch_size: 32
shuffle: false
num_readers: 1
classification_decoder_config{
label_map_path: 'data/imagenet_tfrecord/imagenet_labelmap.pbtxt'
}
data_augmentation_options {
vgg_preprocessing {
is_training: false
}
}
}
mobilenet_v3¶
##-*- encoding:utf-8 -*-
# Author: wenmeng.zwm@alibaba-inc.com
# Date: 2018-09-06
# classfication_resnet50.config: config resnet50 model for configuration
#
model_config {
model_class: 'Classification'
classification {
backbone {
class_name: 'mobilenet_v3'
weight_decay: 0.00004
depth_multiplier: 1.0
}
num_classes: 1001
loss {
weighted_softmax {
}
}
label_id_offset: 0
}
}
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.1
schedule {
step: 200000
learning_rate: 0.01
}
schedule {
step: 400000
learning_rate: 0.001
}
schedule {
step: 550000
learning_rate: 0.0001
}
}
}
momentum_optimizer_value: 0.9
}
}
#gradient_clipping_by_norm : 10.0
num_steps: 800000
model_dir: 'experiments/imagenet/output/imagenet_mobilenet_v3'
}
train_data: {
input_path: "data/imagenet/tfrecords/train-*"
batch_size: 256
num_readers: 4
shuffle: true
read_block_length : 256
classification_decoder_config{
}
data_augmentation_options {
inception_preprocessing {
is_training: true
}
}
}
eval_config: {
num_examples: 50000
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
# max_evals: 10
num_visualizations: 0
metrics_set : 'classification_metric'
}
eval_data : {
input_path: "data/imagenet/tfrecords/validation-*"
batch_size: 100
shuffle: false
num_readers: 1
classification_decoder_config{
label_map_path: 'data/imagenet/tfrecords/imagenet_labelmap.pbtxt'
}
data_augmentation_options {
inception_preprocessing {
is_training: false
}
}
}
efficientnet_b0¶
##-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2020-02-10
model_config {
model_class: 'Classification'
classification {
backbone {
class_name: 'efficientnet-b0'
weight_decay: 0.00001
connect_survival_prob: 0.8
}
num_classes: 1000
loss {
weighted_softmax {
}
}
label_id_offset: 1
}
}
train_config: {
optimizer {
rms_prop_optimizer: {
learning_rate: {
# decay by 0.97 every 2.4 epochs
exponential_decay_learning_rate {
initial_learning_rate: 0.032
decay_steps: 6000
decay_factor: 0.97
}
}
}
use_moving_average: false
}
num_steps: 900000 # total 150 epochs
save_checkpoints_steps: 5000
model_dir: 'experiments/imagenet/output/imagenet_efficientnet_b0/'
sync_replicas: true
replicas_to_aggregate:8
num_worker_replicas: 8
}
train_data: {
input_path: "data/imagenet/tfrecords/train-*"
batch_size: 64
num_readers: 8
shuffle: true
read_block_length : 32
classification_decoder_config{
}
data_augmentation_options {
efficientnet_preprocessing {
model_name: 'efficientnet-b0' # use default image size for the model
is_training: true
}
}
}
eval_config: {
num_examples: 50000
metrics_set : 'classification_metric'
}
eval_data : {
input_path: "data/imagenet/tfrecords/validation-*"
batch_size: 100
read_block_length : 100
shuffle: false
num_readers: 1
classification_decoder_config{
label_map_path: 'data/imagenet/imagenet_labelmap.pbtxt'
}
data_augmentation_options {
efficientnet_preprocessing {
model_name: 'efficientnet-b0' # use default image size for the model
is_training: false
}
}
}
darknet53¶
##-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2020-02-10
model_config {
model_class: 'Classification'
classification {
backbone {
class_name: 'darknet53'
weight_decay: 0.00001
}
num_classes: 1000
loss {
weighted_softmax {
}
}
label_id_offset: 1
}
}
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
# decay by 0.97 every 2.4 epochs
exponential_decay_learning_rate {
# initial_learning_rate: 0.032
initial_learning_rate: 0.0
decay_steps: 6000
decay_factor: 0.97
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
num_steps: 1 # 900000 # total 150 epochs
save_checkpoints_steps: 5000
model_dir: 'experiments/imagenet/output/imagenet_darknet53/'
}
train_data: {
input_path: "data/imagenet/tfrecords/train-*"
batch_size: 64
num_readers: 8
shuffle: true
read_block_length : 32
classification_decoder_config{
}
data_augmentation_options {
classification_random_crop {
}
}
data_augmentation_options {
resize_image {
new_height: 256
new_width: 256
}
}
data_augmentation_options {
normalize_image {
original_minval: 0.0
original_maxval: 255.0
target_minval: 0.0
target_maxval: 1.0
}
}
}
eval_config: {
num_examples: 50000
metrics_set : 'classification_metric'
}
eval_data : {
input_path: "data/imagenet/tfrecords/validation-*"
batch_size: 100
read_block_length : 100
shuffle: false
num_readers: 1
classification_decoder_config{
label_map_path: 'data/imagenet/imagenet_labelmap.pbtxt'
}
data_augmentation_options {
classification_central_crop {
}
}
data_augmentation_options {
resize_image {
new_height: 224 #256
new_width: 224 #256
}
}
data_augmentation_options {
normalize_image {
original_minval: 0.0
original_maxval: 255.0
target_minval: 0.0
target_maxval: 1.0
}
}
}
物体检测¶
faster_rcnn_r50¶
#-*- encoding:utf-8 -*-
# Author: mengli.cml@alibaba-inc.com
# Date: 2018_06-22
# simple_rpn.config: encode the configs used in a simple rpn model
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.00001
schedule {
step: 100
learning_rate: 0.001
}
schedule {
step: 90000
learning_rate: .0001
}
schedule {
step: 120000
learning_rate: .00001
}
warmup: true
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint: "pretrained_models/resnet_v1d_50/model.ckpt"
num_steps: 150000
model_dir: "pascal_resnet50_frcnn_model"
}
train_data: {
# [0-7] evenly split into 8 parts
input_path: "data/voc0712_tfrecord/voc0712_part_*.tfrecord"
batch_size: 2
num_readers: 4
read_block_length: 1
shuffle: true
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_resize_to_range {
min_sizes: 600
max_sizes: 1024
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: false
}
eval_config: {
num_examples: 4952
max_evals: 1000
num_visualizations: 100
#metrics_set: 'coco_detection_metrics'
metrics_set: 'pascal_voc_detection_metrics'
metrics_set: 'pascal_voc07_detection_metrics'
}
eval_data: {
input_path: "data/voc0712_tfrecord/VOC2007_test.tfrecord"
batch_size: 1
shuffle: false
num_readers: 1
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_resize_to_range {
min_sizes: 600
max_sizes: 1024
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: true
}
export_config {
batch_size: 1
}
model_config: {
model_class: 'FasterRcnn'
faster_rcnn {
backbone {
class_name: 'resnet_v1d_50'
batchnorm_trainable: false
weight_decay: 0.0001
output_stride: 16
}
rpn_head {
input_layer: 'resnet_v1d_50/block3'
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
min_depth: 512
max_depth: 512
num_layers_before_predictor: 1
kernel_size: 3
}
}
first_stage_minibatch_size: 256
first_stage_positive_balance_fraction: 0.5
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
rpn_min_size: 16
first_stage_anchor_generator {
# the default base anchor size is 256
grid_anchor_generator {
scales: [0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
}
region_feature_extractor {
resnet_block {
class_name: 'resnet_v1d_50' #the name of backbone
block_name: 'block4' #the last residual block of resnet_v1d_50
stride: 1
weight_decay: 0.0001
}
}
rcnn_head {
input_layer: 'resnet_v1d_50/block3'
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
num_classes: 20
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
xavier_initializer {
}
}
}
agnostic: true
}
}
nms_config {
score_threshold: 0.0
iou_threshold: 0.3
max_detections_per_class: 400
max_total_detections: 400
}
second_stage_batch_size: 128
second_stage_balance_fraction: 0.25
}
}
}
faster_rcnn_r50_fpn¶
#-*- encoding:utf-8 -*-
# Author: mengli.cml@alibaba-inc.com
# Date: 2018_06-22
# simple_rpn.config: encode the configs used in a simple rpn model
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.00001
schedule {
step: 100
learning_rate: 0.001
}
schedule {
step: 90000
learning_rate: .0001
}
schedule {
step: 120000
learning_rate: .00001
}
warmup: true
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint: "pretrained_models/resnet_v1d_50/model.ckpt"
num_steps: 150000
model_dir: "pascal_resnet50_frcnn_model_fpn"
log_step_count_steps: 1
}
train_data: {
input_path: "data/voc0712_tfrecord/voc0712_part_*.tfrecord"
batch_size: 2
num_readers: 4
read_block_length: 1
shuffle: true
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_resize_to_range {
min_sizes: 600
max_sizes: 1024
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: false
}
eval_config: {
num_examples: 4952
max_evals: 1000
num_visualizations: 100
#metrics_set: 'coco_detection_metrics'
metrics_set: 'pascal_voc_detection_metrics'
metrics_set: 'pascal_voc07_detection_metrics'
}
eval_data: {
input_path: "data/voc0712_tfrecord/VOC2007_test.tfrecord"
batch_size: 1
shuffle: false
num_readers: 1
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_resize_to_range {
min_sizes: 600
max_sizes: 1024
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: true
}
export_config {
batch_size: 1
}
model_config: {
model_class: 'FasterRcnn'
faster_rcnn {
backbone {
class_name: 'resnet_v1d_50'
batchnorm_trainable: false
weight_decay: 0.0001
}
fpn {
input: 'resnet_v1d_50/block1'
input: 'resnet_v1d_50/block2'
input: 'resnet_v1d_50/block3'
input: 'resnet_v1d_50/block4'
fea_dim: 256
extra_conv_layers: 1
roi_min_level: 2
roi_max_level: 5
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
}
rpn_head {
# if input_layer is not specified, will use fpn features,
# which all have "FPN/" prefix
box_predictor {
weight_shared_convolutional_box_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
depth: 512
num_layers_before_predictor: 1
kernel_size: 3
}
}
first_stage_minibatch_size: 256
first_stage_positive_balance_fraction: 0.5
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
rpn_min_size: 16
first_stage_anchor_generator {
# anchor_size = anchor_scale * feature_map_stride
multiscale_anchor_generator {
min_level: 2
max_level: 6
anchor_scale: 8
aspect_ratios: 0.5
aspect_ratios: 1
aspect_ratios: 2
normalize_coordinates: false
scales_per_octave: 1
}
}
}
rcnn_head {
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
num_classes: 20
second_stage_box_predictor {
mask_rcnn_box_predictor {
depth: 1024
num_layers_before_predictor: 2
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
xavier_initializer {
}
}
}
agnostic: true
}
}
nms_config {
score_threshold: 0.0
iou_threshold: 0.3
max_detections_per_class: 400
max_total_detections: 400
}
second_stage_batch_size: 128
second_stage_balance_fraction: 0.25
}
}
}
ssd_r50¶
##-*- encoding:utf-8 -*-
# Author: wenmeng.zwm@alibaba-inc.com
# Date: 2018-08-22
#
#
#
# SSD with Resnet50 configuration for VOC Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model_config {
model_class: 'SSD'
ssd {
backbone {
class_name: "resnet_v1d_50"
output_stride: 16
}
ssd_head {
num_classes: 20
ssd_featuremap_layout {
from_layer: 'resnet_v1d_50/block3'
from_layer: 'resnet_v1d_50/block4'
from_layer: ''
from_layer: ''
from_layer: ''
from_layer: ''
layer_depth: -1
layer_depth: -1
layer_depth: 512
layer_depth: 512
layer_depth: 256
layer_depth: 256
}
#min_depth: 16
#depth_multiplier: 1.0
conv_hyperparams {
activation: RELU,
regularizer {
l2_regularizer {
weight: 0.0005
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
#min_scale: 0.2
#max_scale: 0.9
#use caffe anchor scale, the last one is [0.88, 1.0]
scales: 0.1
scales: 0.2
scales: 0.37
scales: 0.54
scales: 0.71
scales: 0.88
scales: 1.0
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
reduce_boxes_in_larger_layers: true
interpolate_in_all_layers: true
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
kernel_size: 3
box_code_size: 4
conv_hyperparams {
#activation: RELU_6,
activation: NONE,
regularizer {
l2_regularizer {
weight: 0.0005
}
}
initializer {
#truncated_normal_initializer {
# stddev: 0.03
# mean: 0.0
#}
xavier_initializer {
uniform : false
}
}
}
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.45
max_detections_per_class: 100
max_total_detections: 200
}
score_converter: SOFTMAX
}
normalize_loss_by_num_matches: true
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: BOTH
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 3.0
localization_weight: 1.0
}
}
}
}
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.001
schedule {
step: 80000
learning_rate: 0.0001
}
schedule {
step: 100000
learning_rate: 0.00001
}
}
}
momentum_optimizer_value: 0.9
}
}
#gradient_clipping_by_norm : 10.0
fine_tune_checkpoint: "pretrained_models/resnet_v1d_50/model.ckpt"
num_steps: 120000
model_dir: 'experiments/ssd_resnet50/train'
}
train_data: {
input_path: "data/voc0712_tfrecord/VOC2007_train.tfrecord"
input_path: "data/voc0712_tfrecord/VOC2012_train.tfrecord"
input_path: "data/voc0712_tfrecord/VOC2007_val.tfrecord"
input_path: "data/voc0712_tfrecord/VOC2012_val.tfrecord"
batch_size: 32
num_readers: 4
shuffle: true
read_block_length : 32
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
#data argumentation
data_augmentation_options {
ssd_random_crop {
}
}
data_augmentation_options {
random_adjust_brightness {
max_delta:0.125
}
}
data_augmentation_options {
random_adjust_contrast {
min_delta : 0.5
max_delta : 1.5
}
}
data_augmentation_options {
random_adjust_hue {
max_delta : 0.046875
}
}
data_augmentation_options {
random_adjust_saturation {
min_delta : 0.5
max_delta : 1.5
}
}
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
resize_image {
new_height: 300
new_width: 300
method: BILINEAR
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_pascal.py#L177
means: 123.68
means: 116.779
means: 103.939
}
}
}
eval_config: {
num_examples: 4592
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
# max_evals: 10
metrics_set : 'pascal_voc07_detection_metrics'
#metrics_set : 'coco_detection_metrics'
visualize_groundtruth_boxes : true
# num of visualizations to be displayed on tensorboard
num_visualizations : 10
# all the evaluation results will be saved to this dir if not ''
visualization_export_dir: ''
max_num_boxes_to_visualize: 20
min_score_threshold: 0.5
}
eval_data : {
input_path: "data/voc0712_tfrecord/VOC2007_test.tfrecord"
batch_size: 1
shuffle: false
num_readers: 1
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
data_augmentation_options {
resize_image {
new_height: 300
new_width: 300
method: BILINEAR
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_pascal.py#L177
means: 123.68
means: 116.779
means: 103.939
}
}
}
export_config {
batch_size: 1
}
ssd_r50_fpn¶
##-*- encoding:utf-8 -*-
# Author: wenmeng.zwm@alibaba-inc.com
# Date: 2018-08-22
#
#
#
# SSD with Resnet50 configuration for VOC Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model_config {
model_class: 'SSD'
ssd {
backbone {
class_name: "resnet_v1d_50"
}
ssd_head {
num_classes: 20
fpn_featuremap_layout {
from_layer: 'resnet_v1d_50/block2'
from_layer: 'resnet_v1d_50/block3'
from_layer: 'resnet_v1d_50/block4'
layer_depth: 256
extra_conv_layers: 2
}
conv_hyperparams {
activation: RELU,
regularizer {
l2_regularizer {
weight: 0.0005
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
multiscale_anchor_generator{
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [0.5, 1.0, 2.0, 0.333, 3.0]
scales_per_octave: 2
normalize_coordinates: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor{
depth: 256
num_layers_before_predictor: 4
kernel_size: 3
box_code_size: 4
conv_hyperparams {
#activation: RELU_6,
activation: NONE,
regularizer {
l2_regularizer {
weight: 0.0005
}
}
initializer {
xavier_initializer {
uniform : false
}
}
batch_norm {
scale: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.45
max_detections_per_class: 100
max_total_detections: 200
}
score_converter: SOFTMAX
}
normalize_loss_by_num_matches: true
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: BOTH
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 3.0
localization_weight: 1.0
}
}
}
}
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.001
schedule {
step: 80000
learning_rate: 0.0001
}
schedule {
step: 100000
learning_rate: 0.00001
}
}
}
momentum_optimizer_value: 0.9
}
}
#gradient_clipping_by_norm : 10.0
fine_tune_checkpoint: "pretrained_models/resnet_v1d_50/model.ckpt"
num_steps: 120000
model_dir: 'experiments/ssd_resnet50_fpn'
}
train_data: {
input_path: "data/voc0712_tfrecord/VOC2007_train.tfrecord"
input_path: "data/voc0712_tfrecord/VOC2012_train.tfrecord"
input_path: "data/voc0712_tfrecord/VOC2007_val.tfrecord"
input_path: "data/voc0712_tfrecord/VOC2012_val.tfrecord"
batch_size: 32
num_readers: 4
shuffle: true
read_block_length : 32
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
#data argumentation
data_augmentation_options {
ssd_random_crop {
}
}
data_augmentation_options {
random_adjust_brightness {
max_delta:0.125
}
}
data_augmentation_options {
random_adjust_contrast {
min_delta : 0.5
max_delta : 1.5
}
}
data_augmentation_options {
random_adjust_hue {
max_delta : 0.046875
}
}
data_augmentation_options {
random_adjust_saturation {
min_delta : 0.5
max_delta : 1.5
}
}
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
resize_image {
new_height: 300
new_width: 300
method: BILINEAR
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_pascal.py#L177
means: 123.68
means: 116.779
means: 103.939
}
}
}
eval_config: {
num_examples: 4592
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
# max_evals: 10
metrics_set : 'pascal_voc07_detection_metrics'
#metrics_set : 'coco_detection_metrics'
visualize_groundtruth_boxes : true
# num of visualizations to be displayed on tensorboard
num_visualizations : 10
# all the evaluation results will be saved to this dir if not ''
visualization_export_dir: ''
max_num_boxes_to_visualize: 20
min_score_threshold: 0.5
}
eval_data : {
input_path: "data/voc0712_tfrecord/VOC2007_test.tfrecord"
batch_size: 1
shuffle: false
num_readers: 1
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
data_augmentation_options {
resize_image {
new_height: 300
new_width: 300
method: BILINEAR
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_pascal.py#L177
means: 123.68
means: 116.779
means: 103.939
}
}
}
export_config {
batch_size: 1
}
ssd_mobilenet¶
yolo3¶
##-*- encoding:utf-8 -*-
# Author: honsgehng.jhs@alibaba-inc.com
# Date: 2020-06-01
# yolo3.config: encode the configs used in a yolo-v3 model
model_config {
model_class: 'YOLO3'
yolo {
backbone {
class_name: "darknet53"
}
yolo_head {
num_classes: 20
yolo_featuremap_layout {
from_layer: 'conv4_res'
from_layer: 'conv5_res'
from_layer: 'conv6'
}
conv_hyperparams {
activation: LEAKY_RELU
regularizer {
l2_regularizer {
weight: 0.0005
}
}
initializer {
xavier_initializer {
uniform : true
}
}
batch_norm {
decay: 0.997
center: true
scale: true
epsilon: 1e-05
}
}
anchor_generator {
yolo_anchor_generator {
anchor_group {
anchor_size {
width: 10
height: 13
}
anchor_size {
width: 16
height: 30
}
anchor_size {
width: 33
height: 23
}
}
anchor_group {
anchor_size {
width: 30
height: 61
}
anchor_size {
width: 62
height: 45
}
anchor_size {
width: 59
height: 119
}
}
anchor_group {
anchor_size {
width: 116
height: 90
}
anchor_size {
width: 156
height: 198
}
anchor_size {
width: 373
height: 326
}
}
}
}
box_predictor {
yolo_box_predictor {
conv_hyperparams {
activation: LEAKY_RELU
regularizer {
l2_regularizer {
weight: 0.0005
}
}
initializer {
xavier_initializer {
uniform : true
}
}
batch_norm {
decay: 0.997
center: true
scale: true
epsilon: 1e-05
}
}
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.45
max_detections_per_class: 200
max_total_detections: 200
}
score_converter: SIGMOID
}
ignore_threshold: 0.5
}
}
}
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0000001
schedule {
step: 1000
learning_rate: 0.001
}
schedule {
step: 40000
learning_rate: 0.0001
}
schedule {
step: 45000
learning_rate: 0.00001
}
warmup: true
}
}
momentum_optimizer_value: 0.9
}
}
#gradient_clipping_by_norm : 10.0
num_steps: 50200
fine_tune_checkpoint: "pretrained_models/darknet53/model.ckpt"
model_dir: 'experiments/yolo/output/yolo3_voc'
summary_model_vars: false
sync_replicas: false
train_distribute: "mirrored"
num_gpus_per_worker: 8
}
train_data: {
input_path: "data/voc0712_tfrecord/voc0712_part_*.tfrecord"
batch_size: 8 #16
num_readers: 4
shuffle: true
read_block_length : 32
bucket_sizes: 10
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
data_augmentation_options {
random_distort_color {
}
}
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_pad_image {
max_height_ratio: 1.6
max_width_ratio: 1.6
}
}
data_augmentation_options {
random_crop_image {
min_aspect_ratio: 0.25
max_aspect_ratio: 4.0
min_area: 0.1
max_area: 1.0
}
}
data_augmentation_options {
normalize_image {
original_minval: 0.0
original_maxval: 255.0
target_minval: 0.0
target_maxval: 1.0
}
}
data_augmentation_options {
random_resize_image {
new_heights: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
new_widths: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
method: BICUBIC
}
}
}
eval_config: {
num_examples: 4952
metrics_set : 'pascal_voc07_detection_metrics'
visualize_groundtruth_boxes : true
# num of visualizations to be displayed on tensorboard
num_visualizations : 32
# all the evaluation results will be saved to this dir if not ''
visualization_export_dir: ''
max_num_boxes_to_visualize: 32
min_score_threshold: 0.5
matching_iou_threshold: 0.5
}
eval_data : {
input_path: "data/voc0712_tfrecord/VOC2007_test.tfrecord"
batch_size: 1
shuffle: false
num_readers: 1
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
data_augmentation_options {
normalize_image {
original_minval: 0.0
original_maxval: 255.0
target_minval: 0.0
target_maxval: 1.0
}
}
data_augmentation_options {
resize_image {
new_height: 416
new_width: 416
method: BICUBIC
}
}
}
export_config {
batch_size: 1
}
图像分割¶
deeplab_v3+_r101_stage1¶
#-*- encoding:utf-8 -*-
# Author: mengli.cml@alibaba-inc.com
# Date: 2018_06-22
# step1 of deeplab configuration
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
poly_decay_learning_rate {
learning_rate_base: 0.007
total_steps: 30000
power: 0.9
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
save_checkpoints_steps: 5000
#fine_tune_checkpoint: "xception/model.ckpt"
fine_tune_checkpoint: "pretrained_models/resnet_v1d_101/model.ckpt"
num_steps: 30000
model_dir: "pascal_deeplab_model"
}
train_data: {
input_path: "data/pascal_voc_seg_aug/voc_ev_train.tfrecord"
batch_size: 6
num_readers: 4
read_block_length: 1
shuffle: true
seg_decoder_config { }
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
data_augmentation_options {
deeplab_random_crop {
crop_size: 513
}
}
data_augmentation_options {
deeplab_random_horizontal_flip {
}
}
}
eval_config: {
# num_examples: 100
max_evals: 1000
num_visualizations: 100
}
eval_data: {
input_path: "data/pascal_voc_seg_aug/voc_ev_val.tfrecord"
batch_size: 1
shuffle: false
num_readers: 1
seg_decoder_config {
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
num_epochs: 1
}
model_config: {
model_class: 'DeepLab'
deeplab {
backbone {
# class_name: 'xception_65'
class_name: 'resnet_v1d_101'
batchnorm_trainable: true
weight_decay: 0.0005
output_stride: 16
}
aspp_input_layer: 'resnet_v1d_101/block4'
aspp_block {
image_level_features: true
batchnorm_trainable: true
weight_decay: 1e-5
feature_depth: 256
atrous_rates: 6
atrous_rates: 12
atrous_rates: 18
keep_prob: 0.9
}
seg_decoder_head {
weight_decay: 1e-5
batchnorm_trainable: true
# input_layer: 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_pointwise'
input_layer: 'resnet_v1d_101/block1'
decoder_depth: 256
output_stride: 4
num_classes: 21
}
}
}
deeplab_v3+_r101_stage2¶
#-*- encoding:utf-8 -*-
# Author: mengli.cml@alibaba-inc.com
# Date: 2018_06-22
# step2 of deeplab configuration
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
poly_decay_learning_rate {
learning_rate_base: 0.0002
total_steps: 30000
power: 0.9
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
save_checkpoints_steps: 5000
fine_tune_checkpoint: "pascal_deeplab_model/model.ckpt-30000"
num_steps: 30000
model_dir: "pascal_deeplab_model_finetune"
}
train_data: {
input_path: "pascal_voc_seg/train-00000-of-00004.tfrecord"
input_path: "pascal_voc_seg/train-00001-of-00004.tfrecord"
input_path: "pascal_voc_seg/train-00002-of-00004.tfrecord"
input_path: "pascal_voc_seg/train-00003-of-00004.tfrecord"
batch_size: 2
num_readers: 4
read_block_length: 1
shuffle: true
seg_decoder_config { }
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
data_augmentation_options {
deeplab_random_crop {
crop_size: 513
}
}
data_augmentation_options {
deeplab_random_horizontal_flip {
}
}
}
eval_config: {
# num_examples: 100
max_evals: 1000
num_visualizations: 100
}
eval_data: {
input_path: "pascal_voc_seg/val-00000-of-00004.tfrecord"
input_path: "pascal_voc_seg/val-00001-of-00004.tfrecord"
input_path: "pascal_voc_seg/val-00002-of-00004.tfrecord"
input_path: "pascal_voc_seg/val-00003-of-00004.tfrecord"
batch_size: 1
shuffle: false
num_readers: 1
seg_decoder_config {
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
num_epochs: 1
}
model_config: {
model_class: 'DeepLab'
deeplab {
backbone {
# class_name: 'xception_65'
class_name: 'resnet_v1d_101'
batchnorm_trainable: false
weight_decay: 0.0005
output_stride: 8
}
aspp_input_layer: 'resnet_v1d_101/block4'
aspp_block {
image_level_features: true
batchnorm_trainable: false
weight_decay: 1e-5
feature_depth: 256
atrous_rates: 12
atrous_rates: 24
atrous_rates: 36
keep_prob: 0.9
}
seg_decoder_head {
weight_decay: 1e-5
batchnorm_trainable: false
# input_layer: 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_pointwise'
input_layer: 'resnet_v1d_101/block1'
decoder_depth: 256
output_stride: 4
num_classes: 21
}
}
}
实例分割¶
mask_rcnn_r50¶
#-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-01-16
# mask_rcnn.config: mscoco mask rcnn model config
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.00002
schedule {
step: 100
learning_rate: 0.001
}
schedule {
step: 240000
learning_rate: .0001
}
schedule {
step: 320000
learning_rate: .00001
}
warmup: true
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint: "pretrained_models/resnet_v1d_50/model.ckpt"
num_steps: 360000
model_dir: "experiments/coco_resnet50_maskrcnn_model"
}
train_data: {
input_path: "data/coco_wmask/coco_train_*.tfrecord"
batch_size: 1
num_readers: 8
read_block_length: 1
shuffle: true
shuffle_buffer_size: 512
prefetch_size: 256
voc_decoder_config {
label_map_path: "data/coco_wmask/mscoco_label_map.pbtxt"
load_instance_masks: true
mask_format: PNG_MASK_FORMAT
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_resize_to_range {
min_sizes: [640, 672, 704, 736, 768, 800]
max_sizes: [1333, 1333, 1333, 1333, 1333, 1333]
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: false
}
eval_config: {
num_examples: 5000
num_visualizations: 16
metrics_set: 'coco_detection_metrics'
metrics_set: 'coco_mask_metrics'
visualize_groundtruth_boxes: true
}
eval_data: {
input_path: "data/coco_wmask/coco_val.tfrecord"
batch_size: 1
shuffle: false
prefetch_size: 256
num_readers: 1
voc_decoder_config {
label_map_path: "data/mscoco/mscoco_label_map.pbtxt"
load_instance_masks: true
mask_format: PNG_MASK_FORMAT
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_resize_to_range {
min_sizes: 800
max_sizes: 1333
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: true
}
export_config {
batch_size: 1
}
model_config: {
model_class: 'FasterRcnn'
faster_rcnn {
backbone {
class_name: 'resnet_v1d_50'
batchnorm_trainable: false
weight_decay: 0.0001
output_stride: 16
}
rpn_head {
input_layer: 'resnet_v1d_50/block3'
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
min_depth: 512
max_depth: 512
num_layers_before_predictor: 1
kernel_size: 3
}
}
first_stage_minibatch_size: 256
first_stage_positive_balance_fraction: 0.5
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 2000
rpn_min_size: 0
first_stage_anchor_generator {
# the default base anchor size is 256
grid_anchor_generator {
scales: [0.125, 0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
}
region_feature_extractor {
resnet_block {
class_name: 'resnet_v1d_50' #the name of backbone
block_name: 'block4' #the last residual block of resnet_v1d_50
stride: 1
weight_decay: 0.0001
}
}
rcnn_head {
input_layer: 'resnet_v1d_50/block3'
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
num_classes: 90
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
xavier_initializer {
}
}
}
agnostic: true
}
}
nms_config {
score_threshold: 0.05
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
second_stage_batch_size: 512
second_stage_balance_fraction: 0.25
}
mrcnn_head {
input_layer: 'resnet_v1d_50/block3'
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
num_classes: 90
third_stage_mask_predictor {
mask_rcnn_mask_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
variance_scaling_initializer {
}
}
}
mask_height: 14
mask_width: 14
mask_prediction_conv_depth: 256
mask_prediction_num_conv_layers: 1
convolve_then_upsample_masks: true
}
}
}
}
}
mask_rcnn_r50_fpn¶
#-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-01-16
# mask_rcnn_fpn.config: coco_wmask mask rcnn fpn model config
train_config: {
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.00002
schedule {
step: 100
learning_rate: 0.001
}
schedule {
step: 240000
learning_rate: .0001
}
schedule {
step: 320000
learning_rate: .00001
}
warmup: true
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint: "pretrained_models/resnet_v1d_50/model.ckpt"
num_steps: 360000
model_dir: "experiments/coco_resnet50_maskrcnn_model_fpn"
}
train_data: {
input_path: "data/coco_wmask/coco_train_*.tfrecord"
batch_size: 1
num_readers: 8
read_block_length: 1
shuffle: true
shuffle_buffer_size: 512
prefetch_size: 256
voc_decoder_config {
label_map_path: "data/coco_wmask/mscoco_label_map.pbtxt"
load_instance_masks: true
mask_format: PNG_MASK_FORMAT
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_resize_to_range {
min_sizes: [640, 672, 704, 736, 768, 800]
max_sizes: [1333, 1333, 1333, 1333, 1333, 1333]
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: false
}
eval_config: {
num_examples: 5000
num_visualizations: 16
metrics_set: 'coco_detection_metrics'
metrics_set: 'coco_mask_metrics'
visualize_groundtruth_boxes: true
}
eval_data: {
input_path: "data/coco_wmask/coco_val.tfrecord"
batch_size: 1
shuffle: false
prefetch_size: 256
num_readers: 1
voc_decoder_config {
label_map_path: "data/coco_wmask/mscoco_label_map.pbtxt"
load_instance_masks: true
mask_format: PNG_MASK_FORMAT
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_resize_to_range {
min_sizes: 800
max_sizes: 1333
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: true
}
export_config {
batch_size: 1
}
model_config: {
model_class: 'FasterRcnn'
faster_rcnn {
backbone {
class_name: 'resnet_v1d_50'
batchnorm_trainable: false
weight_decay: 0.0001
}
fpn {
input: 'resnet_v1d_50/block1'
input: 'resnet_v1d_50/block2'
input: 'resnet_v1d_50/block3'
input: 'resnet_v1d_50/block4'
fea_dim: 256
extra_conv_layers: 1
roi_min_level: 2
roi_max_level: 5
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
}
rpn_head {
# if input_layer is not specified, will use fpn features,
# which all have "FPN/" prefix
box_predictor {
weight_shared_convolutional_box_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
depth: 256
num_layers_before_predictor: 1
kernel_size: 3
}
}
first_stage_minibatch_size: 256
first_stage_positive_balance_fraction: 0.5
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 2000
rpn_min_size: 16
first_stage_anchor_generator {
# anchor_size = anchor_scale * feature_map_stride
multiscale_anchor_generator {
min_level: 2
max_level: 6
anchor_scale: 8
aspect_ratios: 0.5
aspect_ratios: 1
aspect_ratios: 2
normalize_coordinates: false
scales_per_octave: 1
}
}
}
rcnn_head {
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
num_classes: 90
second_stage_box_predictor {
mask_rcnn_box_predictor {
num_layers_before_predictor: 2
depth: 1024
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
xavier_initializer {
}
}
}
agnostic: true
}
}
nms_config {
score_threshold: 0.05
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
second_stage_batch_size: 512
second_stage_balance_fraction: 0.25
}
mrcnn_head {
initial_crop_size: 28
maxpool_kernel_size: 2
maxpool_stride: 2
num_classes: 90
third_stage_mask_predictor {
mask_rcnn_mask_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
variance_scaling_initializer {
}
}
}
mask_height: 28
mask_width: 28
mask_prediction_conv_depth: 256
mask_prediction_num_conv_layers: 5
convolve_then_upsample_masks: true
}
}
}
}
}
文字检测¶
text_krcnn_r50_fpn¶
#-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-04-27
# text_krcnn_resnet50.config:
# icdar text krcnn model training config
train_config: {
optimizer {
adam_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.00001
decay_steps: 150000
decay_factor: 0.5
min_learning_rate: 0.0000001
}
}
}
use_moving_average: false
}
# gradient_clipping_by_norm: 0.0
fine_tune_checkpoint: "pretrained_models/resnet_v1d_50/model.ckpt"
num_steps: 400000
model_dir: "experiments/icdar_ch4/text_krcnn_resnet50_fpn"
save_checkpoints_steps: 2000
save_summary_steps: 100
log_step_count_steps: 100
summary_model_vars: false
}
train_data: {
input_path: "data/icdar_detection_tfrecords/icdar_training_*.tfrecord"
batch_size: 1
shuffle: true
shuffle_buffer_size: 64
prefetch_size: 64
num_readers: 8
text_detection_decoder_config {
label_map_path: "data/icdar_detection_tfrecords/label_map.pbtxt"
}
data_augmentation_options {
random_jitter_aspect_ratio {
min_jitter_coef: 0.8
max_jitter_coef: 1.2
}
}
data_augmentation_options {
random_rotation {
min_angle: -10
max_angle: 10
use_keypoints_calc_boxes: true
}
}
data_augmentation_options {
random_resize_to_range {
min_sizes: 640
max_sizes: 2000
min_sizes: 800
max_sizes: 2000
min_sizes: 960
max_sizes: 2000
min_sizes: 1120
max_sizes: 2000
}
}
data_augmentation_options {
random_distort_color {
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: false
}
eval_config: {
num_examples: 500
num_visualizations : 16
metrics_set: "icdar_detection_metrics"
visualization_export_dir: ''
}
eval_data: {
input_path: "data/icdar_detection_tfrecords/icdar-ch4-test.tfrecord"
batch_size: 1
shuffle: false
prefetch_size: 32
text_detection_decoder_config {
label_map_path: "data/icdar_detection_tfrecords/label_map.pbtxt"
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_resize_to_range {
min_sizes: 960
max_sizes: 2000
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: true
}
export_config {
batch_size: 1
}
model_config: {
model_class: 'TextKRCNN'
text_krcnn {
backbone {
class_name: 'resnet_v1d_50'
batchnorm_trainable: false
weight_decay: 0.0001
}
fpn {
input: 'resnet_v1d_50/block1'
input: 'resnet_v1d_50/block2'
input: 'resnet_v1d_50/block3'
input: 'resnet_v1d_50/block4'
fea_dim: 256
extra_conv_layers: 1
roi_min_level: 2
roi_max_level: 5
roi_canonical_scale: 168
roi_canonical_level: 4
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
}
rpn_head {
# if input_layer is not specified, will use fpn features,
# which all have "FPN/" prefix
box_predictor {
weight_shared_convolutional_box_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
depth: 256
num_layers_before_predictor: 1
kernel_size: 3
}
}
first_stage_minibatch_size: 256
first_stage_positive_balance_fraction: 0.5
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
rpn_min_size: 8
first_stage_anchor_generator {
multiscale_anchor_generator {
min_level: 2
max_level: 6
anchor_scale: 6
aspect_ratios: [0.2, 0.5, 1, 2, 5]
normalize_coordinates: false
scales_per_octave: 1
}
}
}
rcnn_head {
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
num_classes: 1
second_stage_box_predictor {
mask_rcnn_box_predictor {
num_layers_before_predictor: 2
depth: 1024
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
agnostic: true
}
}
hard_example_miner {
num_hard_examples: 128
iou_threshold: 0.99
loss_type: BOTH
}
nms_config {
score_threshold: 0.7
iou_threshold: 0.3
max_detections_per_class: 400
max_total_detections: 400
}
second_stage_batch_size: 128
second_stage_balance_fraction: 0.25
}
keypoint_head {
keypoint_predictor {
text_resnet_keypoint_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
variance_scaling_initializer {
}
}
}
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
variance_scaling_initializer {
}
}
}
}
}
initial_crop_size: 28
maxpool_kernel_size: 2
maxpool_stride: 2
num_keypoints: 4
predict_direction: false
direction_trainable: false
}
}
}
文字识别¶
crnn_ctc_r15¶
##-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-02-28
# text crnn ctc config for recipt text
train_config: {
optimizer {
adam_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.0001
decay_steps: 30000
decay_factor: 0.7
}
}
}
use_moving_average: false
}
# gradient_clipping_by_norm : 10.0
num_steps: 1000000
model_dir: 'experiments/recipt_text/crnn_ctc_resnet15_fixed_height_wopretrain'
save_checkpoints_steps: 2000
save_summary_steps: 100
log_step_count_steps: 100
summary_model_vars: false
# for distributed training only
# sync_replicas: false
# replicas_to_aggregate: 8
# num_worker_replicas: 8
}
train_data: {
input_path: "data/recipt_text/recognition_tfrecords/train_*.tfrecord"
batch_size: 64
shuffle: true
num_readers: 8
text_recognition_decoder_config {
char_dict_path: "data/recipt_text/recognition_tfrecords/char_dict"
min_input_ratio: 0.125
max_input_ratio: 38
num_buckets: 10
}
data_augmentation_options {
resize_image_with_fixed_height {
new_height: 32
}
}
data_augmentation_options {
random_distort_color {
}
}
data_augmentation_options {
rgb_to_gray {
}
}
data_augmentation_options {
normalize_image {
original_minval: 0
original_maxval: 255
target_minval: 0
target_maxval: 1
}
}
}
eval_config: {
num_visualizations : 16
}
eval_data: {
input_path: "data/recipt_text/recognition_tfrecords/test.tfrecord"
batch_size: 64
shuffle: false
text_recognition_decoder_config {
char_dict_path: "data/recipt_text/recognition_tfrecords/char_dict"
min_input_ratio: 0.125
max_input_ratio: 100
num_buckets: 10
}
data_augmentation_options {
resize_image_with_fixed_height {
new_height: 32
}
}
data_augmentation_options {
rgb_to_gray {
}
}
data_augmentation_options {
normalize_image {
original_minval: 0
original_maxval: 255
target_minval: 0
target_maxval: 1
}
}
num_epochs: 1
}
export_config {
batch_size: -1
}
model_config {
model_class: 'TextRecognition'
text_recognition {
backbone {
class_name: 'text_resnet15'
}
ctc_head {
input_layer: 'text_resnet15/conv5_0'
crnn_encoder {
num_layers: 2
basic_lstm {
num_units: 512
}
encoder_type: UNI
}
ctc_decoder {
}
}
}
}
crnn_attention_r15¶
##-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-03-20
# text recognition config for recipt text
train_config: {
optimizer {
adam_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.0001
decay_steps: 30000
decay_factor: 0.7
}
}
}
use_moving_average: false
}
# gradient_clipping_by_norm : 10.0
num_steps: 1000000
fine_tune_checkpoint: "pretrained_models/general_crnn_attn_resnet15/model.ckpt"
model_dir: 'experiments/recipt_text/crnn_attn_resnet15_fixed_height'
save_checkpoints_steps: 2000
save_summary_steps: 100
log_step_count_steps: 100
summary_model_vars: false
# for distributed training only
# sync_replicas: false
# replicas_to_aggregate: 8
# num_worker_replicas: 8
}
train_data: {
input_path: "data/recipt_text/recognition_tfrecords/train_*.tfrecord"
batch_size: 64
shuffle: true
num_readers: 8
text_recognition_decoder_config {
char_dict_path: "data/recipt_text/recognition_tfrecords/char_dict"
min_input_ratio: 0.125
max_input_ratio: 38
num_buckets: 10
}
data_augmentation_options {
resize_image_with_fixed_height {
new_height: 32
}
}
data_augmentation_options {
random_distort_color {
}
}
data_augmentation_options {
rgb_to_gray {
}
}
data_augmentation_options {
normalize_image {
original_minval: 0
original_maxval: 255
target_minval: 0
target_maxval: 1
}
}
}
eval_config: {
num_visualizations : 16
}
eval_data: {
input_path: "data/recipt_text/recognition_tfrecords/test.tfrecord"
batch_size: 64
shuffle: false
text_recognition_decoder_config {
char_dict_path: "data/recipt_text/recognition_tfrecords/char_dict"
min_input_ratio: 0.125
max_input_ratio: 100
num_buckets: 10
}
data_augmentation_options {
resize_image_with_fixed_height {
new_height: 32
}
}
data_augmentation_options {
rgb_to_gray {
}
}
data_augmentation_options {
normalize_image {
original_minval: 0
original_maxval: 255
target_minval: 0
target_maxval: 1
}
}
num_epochs: 1
}
export_config {
batch_size: -1
}
model_config {
model_class: 'TextRecognition'
text_recognition {
backbone {
class_name: 'text_resnet15'
}
attention_head {
input_layer: 'text_resnet15/conv5_0'
crnn_encoder {
num_layers: 2
basic_lstm {
num_units: 512
}
encoder_type: UNI
}
attention_decoder {
embedding_size: 64
num_layers: 2
basic_lstm {
num_units: 512
}
attention_mechanism: "normed_bahdanau"
# visualize_type: "line"
}
}
}
}
crnn_mono_attention_r15¶
##-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2020-06-15
# text recognition config for recipt text
train_config: {
optimizer {
adam_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.0001
decay_steps: 30000
decay_factor: 0.7
}
}
}
use_moving_average: false
}
# gradient_clipping_by_norm : 10.0
num_steps: 1000000
fine_tune_checkpoint: "pretrained_models/general_crnn_mono_norm_attn_resnet15_xxlarge/model.ckpt"
model_dir: 'experiments/recipt_text/crnn_mono_norm_attn_resnet15_fixed_height_xxlarge_dict'
save_checkpoints_steps: 2000
save_summary_steps: 100
log_step_count_steps: 100
summary_model_vars: false
# for distributed training only
# sync_replicas: false
# replicas_to_aggregate: 8
# num_worker_replicas: 8
}
train_data: {
input_path: "data/recipt_text/recognition_tfrecords/train_*.tfrecord"
batch_size: 64
shuffle: true
num_readers: 8
text_recognition_decoder_config {
char_dict_path: "data/recipt_text/recognition_tfrecords/char_dict_xxlarge"
min_input_ratio: 0.125
max_input_ratio: 38
num_buckets: 10
}
data_augmentation_options {
resize_image_with_fixed_height {
new_height: 32
}
}
data_augmentation_options {
random_distort_color {
}
}
data_augmentation_options {
random_rgb_to_gray {
probability: 0.2
}
}
data_augmentation_options {
subtract_channel_mean {
means: 123.68
means: 116.779
means: 103.939
}
}
}
eval_config: {
num_visualizations : 16
}
eval_data: {
input_path: "data/recipt_text/recognition_tfrecords/test.tfrecord"
batch_size: 64
shuffle: false
text_recognition_decoder_config {
char_dict_path: "data/recipt_text/recognition_tfrecords/char_dict_xxlarge"
min_input_ratio: 0.125
max_input_ratio: 100
num_buckets: 10
}
data_augmentation_options {
resize_image_with_fixed_height {
new_height: 32
}
}
data_augmentation_options {
subtract_channel_mean {
means: 123.68
means: 116.779
means: 103.939
}
}
num_epochs: 1
}
export_config {
batch_size: -1
}
model_config {
model_class: 'TextRecognition'
text_recognition {
backbone {
class_name: 'text_resnet15'
batchnorm_trainable: true
weight_decay: 0.00001
}
attention_head {
input_layer: 'text_resnet15/conv5_0'
crnn_encoder {
num_layers: 2
layer_norm_basic_lstm {
num_units: 512
}
encoder_type: UNI
}
attention_decoder {
embedding_size: 256
num_layers: 2
layer_norm_basic_lstm {
num_units: 512
}
attention_mechanism: "monotonic_normed_bahdanau"
# visualize_type: "line"
}
}
}
}
cnn_spatial_attention_r15¶
##-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2020-04-24
# text cnn spatial attention config for recipt text
train_config: {
optimizer {
adam_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.0001
decay_steps: 30000
decay_factor: 0.7
}
}
}
use_moving_average: false
}
# gradient_clipping_by_norm : 10.0
num_steps: 1000000
model_dir: 'experiments/recipt_text/cnn_spatial_attn_resnet15_fixed_height_wopretrain'
save_checkpoints_steps: 2000
save_summary_steps: 100
log_step_count_steps: 100
summary_model_vars: false
# for distributed training only
# sync_replicas: false
# replicas_to_aggregate: 8
# num_worker_replicas: 8
}
train_data: {
input_path: "data/recipt_text/recognition_tfrecords/train_*.tfrecord"
batch_size: 24
shuffle: true
num_readers: 8
text_recognition_decoder_config {
char_dict_path: "data/recipt_text/recognition_tfrecords/char_dict"
min_input_ratio: 0.125
max_input_ratio: 38
num_buckets: 10
}
data_augmentation_options {
resize_image_with_fixed_height {
new_height: 64
}
}
data_augmentation_options {
random_distort_color {
}
}
data_augmentation_options {
rgb_to_gray {
}
}
data_augmentation_options {
normalize_image {
original_minval: 0
original_maxval: 255
target_minval: 0
target_maxval: 1
}
}
}
eval_config: {
num_visualizations : 16
}
eval_data: {
input_path: "data/recipt_text/recognition_tfrecords/test.tfrecord"
batch_size: 24
shuffle: false
text_recognition_decoder_config {
char_dict_path: "data/recipt_text/recognition_tfrecords/char_dict"
min_input_ratio: 0.125
max_input_ratio: 100
num_buckets: 10
}
data_augmentation_options {
resize_image_with_fixed_height {
new_height: 64
}
}
data_augmentation_options {
rgb_to_gray {
}
}
data_augmentation_options {
normalize_image {
original_minval: 0
original_maxval: 255
target_minval: 0
target_maxval: 1
}
}
num_epochs: 1
}
export_config {
batch_size: -1
}
model_config {
model_class: 'TextRecognition'
text_recognition {
backbone {
class_name: 'text_resnet15'
}
attention_head {
input_layer: 'text_resnet15/conv5_0'
cnn_spatial_encoder {
}
attention_decoder {
embedding_size: 64
num_layers: 2
basic_lstm {
num_units: 512
}
attention_mechanism: "normed_bahdanau"
pass_hidden_state: false
visualize_type: "spatial"
}
}
}
}
transformer_ocr¶
#-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-09-19
# transformer_f1024_e12d4.config:
# Synth90k text recognition model (Transformer) training config
train_config: {
optimizer {
adam_optimizer: {
learning_rate: {
transformer_learning_rate {
learning_rate_base: 2
hidden_size: 512
warmup_steps: 8000
}
}
}
use_moving_average: false
}
# gradient_clipping_by_norm: 10.0
num_steps: 1000000
model_dir: "experiments/synth90k/output/transformer_f1024_e12d4"
save_checkpoints_steps: 2000
save_summary_steps: 100
log_step_count_steps: 100
summary_model_vars: false
}
train_data: {
input_path: "data/Synth90k_tfrecords/Synth90k_train_*.tfrecord"
batch_size: 512
shuffle: true
num_readers: 8
text_recognition_decoder_config {
char_dict_path: "data/Synth90k_tfrecords/char_dict"
}
data_augmentation_options {
resize_image {
new_height: 32
new_width: 100
}
}
data_augmentation_options {
random_distort_color {
}
}
data_augmentation_options {
rgb_to_gray {
}
}
data_augmentation_options {
normalize_image {
original_minval: 0
original_maxval: 255
target_minval: -1
target_maxval: 1
}
}
}
eval_config: {
num_examples: 31232
num_visualizations : 16
}
eval_data: {
input_path: "data/Synth90k_tfrecords/Synth90k_test.tfrecord"
batch_size: 512
shuffle: false
text_recognition_decoder_config {
char_dict_path: "data/Synth90k_tfrecords/char_dict"
}
data_augmentation_options {
resize_image {
new_height: 32
new_width: 100
}
}
data_augmentation_options {
rgb_to_gray {
}
}
data_augmentation_options {
normalize_image {
original_minval: 0
original_maxval: 255
target_minval: -1
target_maxval: 1
}
}
}
export_config {
batch_size: -1
}
model_config {
model_class: 'TextRecognition'
text_recognition {
transformer_head {
input_layer: 'image'
transformer_encoder {
num_layers: 12
hidden_size: 512
num_heads: 8
filter_size: 1024
layer_postprocess_dropout: 0.1
attention_dropout: 0.1
relu_dropout: 0.1
pooling_rate: 4
}
transformer_decoder {
num_layers: 4
hidden_size: 512
num_heads: 8
filter_size: 1024
layer_postprocess_dropout: 0.1
attention_dropout: 0.1
relu_dropout: 0.1
}
}
}
}
端到端的文字识别¶
text_end2end_krcnn_attention¶
#-*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-03-12
# text_end2end_krcnn_attn_dis.config:
# receipt text text end2end model training config
train_config: {
optimizer {
adam_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.0001
decay_steps: 50000
decay_factor: 0.5
min_learning_rate: 0.000001
}
}
}
use_moving_average: false
}
# gradient_clipping_by_norm: 0.0
fine_tune_checkpoint: "pretrained_models/general_text_end2end_krcnn_attn_resnet50/model.ckpt"
num_steps: 1000000
model_dir: "experiments/recipt_text/text_end2end_krcnn_resnet50_attn"
save_checkpoints_steps: 2000
save_summary_steps: 100
log_step_count_steps: 100
summary_model_vars: false
# for distributed training only
# sync_replicas: false
# replicas_to_aggregate: 8
# num_worker_replicas: 8
}
train_data: {
input_path: "data/recipt_text/end2end_tfrecords/train_*.tfrecord"
batch_size: 1
shuffle: true
shuffle_buffer_size: 64
prefetch_size: 64
num_readers: 8
text_end2end_decoder_config {
char_dict_path: "data/recipt_text/end2end_tfrecords/char_dict"
label_map_path: "data/recipt_text/end2end_tfrecords/label_map.pbtxt"
}
data_augmentation_options {
random_jitter_aspect_ratio {
min_jitter_coef: 0.8
max_jitter_coef: 1.2
}
}
data_augmentation_options {
random_rotation90 {
}
}
data_augmentation_options {
random_rotation {
min_angle: -10
max_angle: 10
use_keypoints_calc_boxes: true
}
}
data_augmentation_options {
random_resize_to_range {
min_sizes: 640
max_sizes: 1440
min_sizes: 800
max_sizes: 1440
min_sizes: 960
max_sizes: 1440
}
}
data_augmentation_options {
random_distort_color {
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: false
}
eval_config: {
num_examples: 299
num_visualizations : 16
visualization_export_dir: ''
metrics_set: "icdar_end2end_metrics"
}
eval_data: {
input_path: "data/recipt_text/end2end_tfrecords/test.tfrecord"
batch_size: 1
shuffle: false
prefetch_size: 32
text_end2end_decoder_config {
char_dict_path: "data/recipt_text/end2end_tfrecords/char_dict"
label_map_path: "data/recipt_text/end2end_tfrecords/label_map.pbtxt"
}
# note the augmentation order is important, so it cannot be changed
data_augmentation_options {
random_resize_to_range {
min_sizes: 800
max_sizes: 1440
}
}
data_augmentation_options {
subtract_channel_mean {
# see https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181
means: 123.68
means: 116.779
means: 103.939
}
}
use_diff: true
}
export_config {
batch_size: 1
}
model_config: {
model_class: 'TextEnd2End'
text_end2end {
backbone {
class_name: 'resnet_v1_50'
batchnorm_trainable: false
weight_decay: 0.0001
}
fpn {
input: 'resnet_v1_50/block1'
input: 'resnet_v1_50/block2'
input: 'resnet_v1_50/block3'
input: 'resnet_v1_50/block4'
fea_dim: 256
extra_conv_layers: 1
roi_min_level: 2
roi_max_level: 5
roi_canonical_scale: 168
roi_canonical_level: 4
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
}
rpn_head {
# if input_layer is not specified, will use fpn features,
# which all have "FPN/" prefix
box_predictor {
weight_shared_convolutional_box_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
depth: 256
num_layers_before_predictor: 1
kernel_size: 3
}
}
first_stage_minibatch_size: 256
first_stage_positive_balance_fraction: 0.5
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
rpn_min_size: 8
first_stage_anchor_generator {
multiscale_anchor_generator {
min_level: 2
max_level: 6
anchor_scale: 6
aspect_ratios: [0.2, 0.5, 1, 2, 5]
normalize_coordinates: false
scales_per_octave: 1
}
}
}
rcnn_head {
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
num_classes: 1
second_stage_box_predictor {
mask_rcnn_box_predictor {
num_layers_before_predictor: 2
depth: 1024
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
agnostic: true
}
}
hard_example_miner {
num_hard_examples: 128
iou_threshold: 0.99
loss_type: BOTH
}
nms_config {
score_threshold: 0.7
iou_threshold: 0.3
max_detections_per_class: 400
max_total_detections: 400
}
second_stage_batch_size: 128
second_stage_balance_fraction: 0.25
}
keypoint_head {
keypoint_predictor {
text_resnet_keypoint_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
variance_scaling_initializer {
}
}
}
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0001
}
}
initializer {
variance_scaling_initializer {
}
}
}
}
}
initial_crop_size: 28
maxpool_kernel_size: 2
maxpool_stride: 2
num_keypoints: 4
predict_direction: true
direction_trainable: true
unified_direction: true
}
fixed_height_feature_gather {
input_layer: 'FPN/level_1'
height: 8
max_width: 300
visualize_height: 32
visualize_width: 100
num_buckets: 5
}
attention_head {
crnn_encoder {
cnn_name: 'senet5_encoder'
norm_type: GROUP
weight_decay: 0.0
num_layers: 2
basic_lstm {
num_units: 512
}
encoder_type: UNI
}
attention_decoder {
embedding_size: 64
num_layers: 2
basic_lstm {
num_units: 512
}
attention_mechanism: "normed_bahdanau"
# visualize_type: "line"
}
}
}
}
SavedModel评估¶
detector¶
# -*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-05-06
# detector evaluation config
predictor_name: "Detector"
model_path: "data/test/inference/rfcn"
eval_data: {
input_path: "data/voc0712_tfrecord/VOC2007_test.tfrecord"
batch_size: 1
shuffle: false
prefetch_size: 32
voc_decoder_config {
label_map_path: "data/voc0712_tfrecord/pascal_label_map.pbtxt"
}
# do not need augmentation
use_diff: true
num_epochs: 1
}
eval_config: {
num_examples: 10
metrics_set: 'coco_detection_metrics'
metrics_set: 'pascal_voc_detection_metrics'
metrics_set: 'pascal_voc07_detection_metrics'
}
classifier¶
# -*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-04-29
# text pipeline predictor evaluation config
predictor_name: "Classifier"
model_path: "data/test/inference/cifar10_resnet50"
eval_data : {
input_path: "data/cifar10/cifar10_test.tfrecord"
batch_size: 100
shuffle: false
num_readers: 1
drop_remainder: false
classification_decoder_config{
label_map_path: 'data/cifar10/labelmap.pbtxt'
}
}
eval_config: {
num_visualizations : 16
visualization_export_dir: ''
metrics_set: "classification_metrics"
}
multilabel_classifier¶
# -*- encoding:utf-8 -*-
# Author: wenmeng.zwm@alibaba-inc.com
# Date: 2019-09-17
# text pipeline predictor evaluation config
predictor_name: "MultiLabelClassifier"
model_path: "data/test/inference/objects365_resnet101"
eval_data : {
input_path: "data/objects365_tfrecord/objects365_test*.tfrecord"
batch_size: 16
shuffle: false
num_readers: 2
classification_decoder_config {
label_map_path: "data/objects365_tfrecord/objects365_label_map.pbtxt"
is_multi_label: true
}
}
eval_config: {
metrics_set: "multi_label_classification_metrics"
include_metrics_per_category: true
}
text_detector¶
# -*- encoding:utf-8 -*-
# Author: wenmeng.zwm@alibaba-inc.com
# Date: 2019-09-17
# text pipeline predictor evaluation config
predictor_name: "MultiLabelClassifier"
model_path: "data/test/inference/objects365_resnet101"
eval_data : {
input_path: "data/objects365_tfrecord/objects365_test*.tfrecord"
batch_size: 16
shuffle: false
num_readers: 2
classification_decoder_config {
label_map_path: "data/objects365_tfrecord/objects365_label_map.pbtxt"
is_multi_label: true
}
}
eval_config: {
metrics_set: "multi_label_classification_metrics"
include_metrics_per_category: true
}
text_detector¶
# -*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-04-29
# text detector evaluation config
predictor_name: "TextDetector"
model_path: "data/test/inference/text_krcnn"
eval_data: {
input_path: "data/icdar_detection_tfrecords/icdar-ch4-test.tfrecord"
batch_size: 1
shuffle: false
prefetch_size: 32
text_detection_decoder_config {
label_map_path: "data/icdar_detection_tfrecords/label_map.pbtxt"
}
# do not need augmentation
use_diff: true
num_epochs: 1
}
eval_config: {
metrics_set: "icdar_detection_metrics"
}
text_recognizer¶
# -*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-04-29
# text pipeline predictor evaluation config
predictor_name: "TextRecognizer"
model_path: "data/test/inference/crnn_attn"
eval_data: {
input_path: "data/recipt_text/recognition_tfrecords/test.tfrecord"
batch_size: 64
shuffle: false
text_recognition_decoder_config {
char_dict_path: "data/recipt_text/recognition_tfrecords/char_dict"
min_input_ratio: 0.125
max_input_ratio: 100
}
# do not need augmentation
num_epochs: 1
}
eval_config: {
metrics_set: "text_recognition_metrics"
}
text_spotter¶
# -*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-04-29
# text pipeline predictor evaluation config
predictor_name: "TextSpotter"
model_path: "data/test/inference/text_end2end"
eval_data: {
input_path: "data/recipt_text/end2end_tfrecords/test.tfrecord"
batch_size: 1
shuffle: false
prefetch_size: 32
text_end2end_decoder_config {
char_dict_path: "data/recipt_text/end2end_tfrecords/char_dict"
label_map_path: "data/recipt_text/end2end_tfrecords/label_map.pbtxt"
}
# do not need augmentation
use_diff: true
num_epochs: 1
}
eval_config: {
num_visualizations : 16
visualization_export_dir: ''
metrics_set: "icdar_end2end_metrics"
}
text_pipeline_predictor¶
# -*- encoding:utf-8 -*-
# Author: hongsheng.jhs@alibaba-inc.com
# Date: 2019-04-29
# text pipeline predictor evaluation config
predictor_name: "TextPipelinePredictor"
model_path: "data/test/inference/text_pipeline"
eval_data: {
input_path: "data/recipt_text/end2end_tfrecords/test.tfrecord"
batch_size: 1
shuffle: false
prefetch_size: 32
text_end2end_decoder_config {
char_dict_path: "data/recipt_text/end2end_tfrecords/char_dict"
label_map_path: "data/recipt_text/end2end_tfrecords/label_map.pbtxt"
}
# do not need augmentation
use_diff: true
num_epochs: 1
}
eval_config: {
metrics_set: "icdar_end2end_metrics"
}