diff --git a/tools/cfgs/kitti_models/voxel_rcnn_car.yaml b/tools/cfgs/kitti_models/voxel_rcnn_car.yaml new file mode 100644 index 0000000..a44436e --- /dev/null +++ b/tools/cfgs/kitti_models/voxel_rcnn_car.yaml @@ -0,0 +1,202 @@ +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: VoxelRCNN + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [64, 128] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [128, 128] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + ROI_HEAD: + NAME: VoxelRCNNHead + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + USE_FAST_NMS: False + SCORE_THRESH: 0.0 + NMS_PRE_MAXSIZE: 2048 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + FEATURES_SOURCE: ['x_conv2', 'x_conv3', 'x_conv4'] + PRE_MLP: True + GRID_SIZE: 6 + POOL_LAYERS: + x_conv2: + MLPS: [[32, 32]] + QUERY_RANGES: [[4, 4, 4]] + POOL_RADIUS: [0.4] + NSAMPLE: [16] + POOL_METHOD: max_pool + x_conv3: + MLPS: [[32, 32]] + QUERY_RANGES: [[4, 4, 4]] + POOL_RADIUS: [0.8] + NSAMPLE: [16] + POOL_METHOD: max_pool + x_conv4: + MLPS: [[32, 32]] + QUERY_RANGES: [[4, 4, 4]] + POOL_RADIUS: [1.6] + NSAMPLE: [16] + POOL_METHOD: max_pool + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + GRID_3D_IOU_LOSS: False + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'rcnn_iou3d_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.3 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10