logo资料库

SGPN中train.py训练代码详细解读.docx

第1页 / 共18页
第2页 / 共18页
第3页 / 共18页
第4页 / 共18页
第5页 / 共18页
第6页 / 共18页
第7页 / 共18页
第8页 / 共18页
资料共18页,剩余部分请下载后查看
Train.py
# see issue #152 Train.py import argparse import tensorflow as tf import numpy as np import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" import sys BASE_DIR = os.path.dirname(os.path.abspath('__file__')) print (BASE_DIR) sys.path.append(BASE_DIR) sys.path.append(os.path.dirname(BASE_DIR)) sys.path.append(os.path.join(BASE_DIR, 'utils')) sys.path.append(os.path.join(BASE_DIR, 'models')) import provider import model 这里有所改动 sys.path.append(os.path.join(BASE_DIR, '../../')) sys.path.append(os.path.join(BASE_DIR, '../../utils')) sys.path.append(os.path.join(BASE_DIR, '../../models')) import provider from models import model 改为: sys.path.append(os.path.join(BASE_DIR, 'utils')) sys.path.append(os.path.join(BASE_DIR, 'models')) import provider import model # Parsing Arguments (解析论据) parser = argparse.ArgumentParser() # Experiment Settings (实验设置) parser.add_argument('--gpu', type=str, default="1", help='GPU to use [default: GPU 1]') parser.add_argument('--wd', type=float, default=0.9, help='Weight Decay [Default: 0.0]') parser.add_argument('--epoch', type=int, default=200, help='Number of epochs [default: 50]') parser.add_argument('--batch', type=int, default=4, help='Batch Size during training [default: 4]') parser.add_argument('--point_num', type=int, default=4096, help='Point Number') parser.add_argument('--group_num', type=int, default=50, help='Maximum Group Number in one pc') parser.add_argument('--cate_num', type=int, default=13, help='Number of categories') parser.add_argument('--margin_same', type=float, default=10., help='Double hinge loss margin: same semantic')
parser.add_argument('--margin_diff', different semantic') type=float, default=80., help='Double hinge loss margin: 这里给实验的一些参数设值。 wd = 0.9 (Weight 衰减) epoch = 200 batch = 4 (培训期间的批量大小) point_num = 4096 (一台电脑的最大组数) group_num = 50 cate_num = 13 (分类数量) margin_same = 10 (双铰链损失余量:相同的语义) margin_diff = 80 (双铰链损失余量:不同的语义) # Input&Output Settings (input&output 设置) parser.add_argument('--output_dir', type=str, help='Directory that stores all training logs and trained models') parser.add_argument('--input_list', type=str, default='data/train_hdf5_file_list.txt', help='Input data list file') parser.add_argument('--restore_model', help='Pretrained model') FLAGS = parser.parse_args(args=[]) default='checkpoint/stanford_sem_seg', type=str, default='checkpoint/stanford_ins_seg', input&output 参数的设置 output_dir (存储所有培训日志和训练模型的目录) input_list (输入数据列表文件) restore_model (预训练模型) os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu TRAINING_FILE_LIST = FLAGS.input_list PRETRAINED_MODEL_PATH = os.path.join(FLAGS.restore_model, 'trained_models/') PRETRAINED_MODEL_PATH (预测模型路径) (trained_models 文件夹内) POINT_NUM = FLAGS.point_num BATCH_SIZE = FLAGS.batch OUTPUT_DIR = FLAGS.output_dir if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) 如果没有 OUTPUT_DIR 的路径 stanford_sem_seg 这个文件夹,创建 stanford_sem_seg 文件 夹。 NUM_GROUPS = FLAGS.group_num NUM_CATEGORY = FLAGS.cate_num
print('#### Batch Size: {0}'.format(BATCH_SIZE)) print('#### Point Number: {0}'.format(POINT_NUM)) print('#### Training using GPU: {0}'.format(FLAGS.gpu)) DECAY_STEP = 800000. DECAY_RATE = 0.5 LEARNING_RATE_CLIP = 1e-6 BASE_LEARNING_RATE = 1e-4 MOMENTUM = 0.9 梯度下降的参数数值设置。 TRAINING_EPOCHES = FLAGS.epoch MARGINS = [FLAGS.margin_same, FLAGS.margin_diff] print('### Training epoch: {0}'.format(TRAINING_EPOCHES)) MODEL_STORAGE_PATH = os.path.join(OUTPUT_DIR, 'trained_models') if not os.path.exists(MODEL_STORAGE_PATH): os.mkdir(MODEL_STORAGE_PATH) LOG_STORAGE_PATH = os.path.join(OUTPUT_DIR, 'logs') if not os.path.exists(LOG_STORAGE_PATH): os.mkdir(LOG_STORAGE_PATH) SUMMARIES_FOLDER = os.path.join(OUTPUT_DIR, 'summaries') if not os.path.exists(SUMMARIES_FOLDER): os.mkdir(SUMMARIES_FOLDER) 在 stanford_sem_seg 这个文件夹中是否有 trained_models、logs、summaries 这三个文件夹, 没有的话创建这些文件夹。 LOG_DIR = FLAGS.output_dir if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR) os.system('cp %s %s' % (os.path.join(BASE_DIR, 'models/model.py'), LOG_DIR)) model def os.system('cp %s %s' % (os.path.join(BASE_DIR, procedure 'train.py'), LOG_DIR)) # bkp of # bkp of train 调用 model.py 中定义过的函数 运行 train 的程序
def printout(flog, data): print(data) flog.write(data + '\n') 定义的 printout 函数 具体作用:后面看了才知道 def train(): with tf.Graph().as_default(): with tf.device('/gpu:' + str(FLAGS.gpu)): batch = tf.Variable(0, trainable=False, name='batch') learning_rate = tf.train.exponential_decay( # base learning rate # global_var indicating the number of steps BASE_LEARNING_RATE, batch * BATCH_SIZE, DECAY_STEP, # step size DECAY_RATE, staircase=True # decay rate # Stair-case or continuous decreasing) learning_rate = tf.maximum(learning_rate, LEARNING_RATE_CLIP) 计 算 指 数 衰 减 的 学 习 率 。 训 练 时 学 习 率 最 好 随 着 训 练 衰 减 。 函 数 tf.train.exponential_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)为指数衰减函数。 BASE_LEARNING_RATE = 1e-4;BATCH_SIZE = 4;DECAY_STEP = 800000;DECAY_RATE = 0.5 计算公式如下: decayed_learning_rate = learning_rate *decay_rate ^ (global_step / decay_steps) 此处 global_step = batch * BATCH_SIZE lr_op = tf.summary.scalar('learning_rate', learning_rate) 这里是为了实现 tensorboard 可视化 learning_rate pointclouds_ph, ptsseglabel_ph, pts_seglabel_mask_ph, pts_group_mask_ph, alpha_ph = model. placeholder_inputs(BATCH_SIZE, POINT_NUM, NUM_GROUPS, NUM_CATEGORY) ptsgroup_label_ph, 这里的 placeholder_inputs 调用的是 model.py 中的函数(如下): def placeholder_inputs(batch_size, num_point, num_group, num_cate): if num_point == 0: pointclouds_ph = tf.placeholder(tf.float32, shape=(batch_size, None, 9)) else: pointclouds_ph = tf.placeholder(tf.float32, shape=(batch_size, num_point, 9)) pts_seglabels_ph = tf.placeholder(tf.int32, shape=(batch_size, num_point, num_cate))
pts_grouplabels_ph = tf.placeholder(tf.float32, shape=(batch_size, num_point, num_group)) pts_seglabel_mask_ph = tf.placeholder(tf.float32, shape=(batch_size, num_point)) pts_group_mask_ph = tf.placeholder(tf.float32, shape=(batch_size, num_point)) alpha_ph = tf.placeholder(tf.float32, shape=()) pts_seglabels_ph, return pts_grouplabels_ph, pointclouds_ph, pts_group_mask_ph, alpha_ph pts_seglabel_mask_ph, pointclouds_ph:(batch_size, num_point, 9) ptsseglabel_ph:(batch_size, num_point, num_cate) 注:这里的 ptsseglabel_ph 以及 ptsgroup_label_ph 和 model.py 中的 pts_seglabels_ph 以及 pts_grouplabels_ph 不一样 ptsgroup_label_ph:(batch_size, num_point, num_group) pts_seglabel_mask_ph:(batch_size, num_point) pts_group_mask_ph:(batch_size, num_point) alpha_ph:shape = ()未指定形状,则可以输入任何形状的张量。 is_training_ph = tf.placeholder(tf.bool, shape=()) is_training_ph:shape = ()未指定形状,则可以输入任何形状的张量。 labels = {'ptsgroup': ptsgroup_label_ph, 'semseg': ptsseglabel_ph, 'semseg_mask': pts_seglabel_mask_ph, 'group_mask': pts_group_mask_ph} labels(标签):这是一段字典,'ptsgroup'为 key,ptsgroup_label_ph 为 value。 net_output = model.get_model(pointclouds_ph, is_training_ph, group_cate_num=NUM_CATEGORY, m=MARGINS[0]) loss, grouperr, same, same_cnt, diff, diff_cnt, pos, pos_cnt = model.get_loss(net_output, labels, alpha_ph, MARGINS) is_training_ph 上面有定义,shape = ()未指定形状,则可以输入任何形状的张量。 group_cate_num = NUM_CATEGORY = cate_num = 13 m = MARGINS[0] 这里的 get_model 调用的是 model.py 中的函数(如下): def get_model(point_cloud, is_training, group_cate_num=50, m=10., bn_decay=None): #input: point_cloud: BxNx9 (XYZ, RGB, NormalizedXYZ) batch_size = point_cloud.get_shape()[0].value print(point_cloud.get_shape()) F = pointnet.get_model(point_cloud, is_training, bn=True, bn_decay=bn_decay) # Semantic prediction(语义预测) Fsem = tf_util.conv2d(F, 128, [1, 1], padding='VALID', stride=[1, 1], bn=False,
ptssemseg_logits = tf_util.conv2d(Fsem, group_cate_num, [1, 1], padding='VALID', stride=[1, 1], activation_fn=None, scope='ptssemseg_logits') is_training=is_training, scope='Fsem') ptssemseg_logits = tf.squeeze(ptssemseg_logits, [2]) ptssemseg = tf.nn.softmax(ptssemseg_logits, name="ptssemseg") # Similarity matrix(相似矩阵) Fsim = tf_util.conv2d(F, 128, [1, 1], padding='VALID', is_training=is_training, scope='Fsim') stride=[1, 1], bn=False, Fsim = tf.squeeze(Fsim, [2]) r = tf.reduce_sum(Fsim * Fsim, 2) r = tf.reshape(r, [batch_size, -1, 1]) print(r.get_shape(),Fsim.get_shape()) D = r - 2 * tf.matmul(Fsim, tf.transpose(Fsim, perm=[0, 2, 1])) + tf.transpose(r, perm=[0, 2, 1]) # simmat_logits = tf.maximum(D, 0.) simmat_logits = tf.maximum(m * D, 0.) # Confidence Map(置信度图) Fconf = tf_util.conv2d(F, 128, [1, 1], padding='VALID', stride=[1, 1], bn=False, conf_logits = tf_util.conv2d(Fconf, 1, is_training=is_training, scope='Fsconf') 1], activation_fn=None, scope='conf_logits') [1, padding='VALID', stride=[1, 1], conf_logits = tf.squeeze(conf_logits, [2]) conf = tf.nn.sigmoid(conf_logits, name="confidence") return {'semseg': ptssemseg, 'semseg_logits': ptssemseg_logits, 'simmat': simmat_logits, 'conf': conf, 'conf_logits': conf_logits} input: point_cloud: BxNx9 (XYZ, RGB, NormalizedXYZ) .get_shape()[i].value:直接获取 TensorShape 变量的第 i 个维度值,比如 a 的 shape(2,3,4), 则 a.get_shape()[1].value = 3。这里的 point_cloud.get_shape()[0].value 是为了得到 point_cloud 的第一个的维度值 B。 def get_model(point_cloud, is_training, bn=True, bn_decay=None): """ ConvNet baseline, input is BxNx9 gray image """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value input_image = tf.expand_dims(point_cloud, -1) # CONV net = tf_util.conv2d(input_image, 64, [1, 9], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1],
bn=bn, is_training=is_training, scope='conv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], is_training=is_training, bn=bn, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv4', bn_decay=bn_decay) points_feat1 = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv5', bn_decay=bn_decay) # MAX pc_feat1 = tf_util.max_pool2d(points_feat1, [num_point, 1], padding='VALID', scope='maxpool1') # FC pc_feat1 = tf.reshape(pc_feat1, [batch_size, -1]) pc_feat1 = tf_util.fully_connected(pc_feat1, 256, bn=bn, is_training=is_training, scope='fc1', bn_decay=bn_decay) pc_feat1 = tf_util.fully_connected(pc_feat1, 128, bn=bn, is_training=is_training, scope='fc2', bn_decay=bn_decay) print(pc_feat1) # CONCAT pc_feat1_expand = tf.tile(tf.reshape(pc_feat1, [batch_size, 1, 1, -1]), [1, num_point, 1, 1]) points_feat1_concat = tf.concat(axis=3, values=[points_feat1, pc_feat1_expand]) # CONV net = tf_util.conv2d(points_feat1_concat, 512, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv6') 256, is_training=is_training, scope='conv7') net = tf_util.conv2d(net, [1, 1], padding='VALID', stride=[1, 1], bn=bn, # net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') # net = tf_util.conv2d(net, 13, [1, 1], padding='VALID', stride=[1, 1], # activation_fn=None, scope='conv8') # net = tf.squeeze(net, [2]) return net def conv2d(inputs,num_output_channels,kernel_size,scope,stride=[1, 1],padding='SAME', use_xavier=True,stddev=1e-3,weight_decay=0.0,activation_fn=tf.nn.relu,bn=False, bn_decay=None,is_training=None):与 tf_util.conv2d(F, 128, [1, 1], padding='VALID', stride=[1, 1], bn=False, is_training=is_training, scope='Fsem')的参数一一对应 这里的 get_ loss 调用的是 model.py 中的函数(如下) def get_loss(net_output, labels, alpha=10., margin=[1.,2.]): """ input:
net_output:{'semseg', 'semseg_logits','simmat','conf','conf_logits'} labels:{'ptsgroup', 'semseg','semseg_mask','group_mask'} """ pts_group_label = labels['ptsgroup'] pts_semseg_label = labels['semseg'] group_mask = tf.expand_dims(labels['group_mask'], dim=2) pred_confidence_logits = net_output['conf'] pred_simmat = net_output['simmat'] # Similarity Matrix loss(相似性矩阵损失) B = pts_group_label.get_shape()[0] N = pts_group_label.get_shape()[1] onediag = tf.ones([B,N], tf.float32) group_mat_label = tf.matmul(pts_group_label,tf.transpose(pts_group_label, perm=[0, 2, 1])) #BxNxN: (i,j) if i and j in the same group(如果 i 和 j 在同一组中) group_mat_label = tf.matrix_set_diag(group_mat_label,onediag) sem_mat_label = tf.cast(tf.matmul(pts_semseg_label,tf.transpose(pts_semseg_label, perm=[0, 2, 1])), tf.float32) #BxNxN: (i,j) if i and j are the same semantic category sem_mat_label = tf.matrix_set_diag(sem_mat_label,onediag) samesem_mat_label = sem_mat_label diffsem_mat_label = tf.subtract(1.0, sem_mat_label) samegroup_mat_label = group_mat_label diffgroup_mat_label = tf.subtract(1.0, group_mat_label) diffgroup_samesem_mat_label = tf.multiply(diffgroup_mat_label, samesem_mat_label) diffgroup_diffsem_mat_label = tf.multiply(diffgroup_mat_label, diffsem_mat_label) num_samegroup = tf.reduce_sum(samegroup_mat_label) num_diffgroup_samesem = tf.reduce_sum(diffgroup_samesem_mat_label) num_diffgroup_diffsem = tf.reduce_sum(diffgroup_diffsem_mat_label) # Double hinge loss(双铰链损失) C_same = tf.constant(margin[0], name="C_same") # same semantic category C_diff = tf.constant(margin[1], name="C_diff") # different semantic category pos = tf.multiply(samegroup_mat_label, pred_simmat) # minimize distances if in the same group neg_samesem = alpha * tf.multiply(diffgroup_samesem_mat_label, neg_diffsem = tf.multiply(diffgroup_diffsem_mat_label, tf.maximum(tf.subtract(C_diff, tf.maximum(tf.subtract(C_same, pred_simmat), 0)) pred_simmat), 0)) simmat_loss = neg_samesem + neg_diffsem + pos group_mask_weight = tf.matmul(group_mask, tf.transpose(group_mask, perm=[0, 2, 1])) # simmat_loss = tf.add(simmat_loss, pos) simmat_loss = tf.multiply(simmat_loss, group_mask_weight) simmat_loss = tf.reduce_mean(simmat_loss)
分享到:
收藏