Files
OpenPCDet/tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet_2frames.yaml

317 lines
9.2 KiB
YAML
Raw Normal View History

2025-09-21 20:19:44 +08:00
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset_multiframe.yaml
SEQUENCE_CONFIG:
ENABLED: True
SAMPLE_OFFSET: [-1, 0]
TRAIN_WITH_SPEED: False
DATA_AUGMENTOR:
DISABLE_AUG_LIST: ['placeholder']
AUG_CONFIG_LIST:
- NAME: gt_sampling
USE_ROAD_PLANE: False
DB_INFO_PATH:
- waymo_processed_data_v0_5_0_waymo_dbinfos_train_sampled_1_multiframe_-4_to_0.pkl
USE_SHARED_MEMORY: False # set it to True to speed up (it costs about 50GB? shared memory)
DB_DATA_PATH:
- waymo_processed_data_v0_5_0_gt_database_train_sampled_1_multiframe_-4_to_0_global.npy
PREPARE: {
filter_by_min_points: ['Vehicle:5', 'Pedestrian:5', 'Cyclist:5'],
filter_by_difficulty: [-1],
}
SAMPLE_GROUPS: ['Vehicle:15', 'Pedestrian:10', 'Cyclist:10']
NUM_POINT_FEATURES: 6
REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
LIMIT_WHOLE_SCENE: True
FILTER_OBJ_POINTS_BY_TIMESTAMP: True
TIME_RANGE: [0.1, 0.0] # 0.1s-0.0s indicates 2 frames
- NAME: random_world_flip
ALONG_AXIS_LIST: ['x', 'y']
- NAME: random_world_rotation
WORLD_ROT_ANGLE: [-0.78539816, 0.78539816]
- NAME: random_world_scaling
WORLD_SCALE_RANGE: [0.95, 1.05]
MODEL:
NAME: PVRCNNPlusPlus
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelResBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: CenterHead
CLASS_AGNOSTIC: False
CLASS_NAMES_EACH_HEAD: [
[ 'Vehicle', 'Pedestrian', 'Cyclist' ]
]
SHARED_CONV_CHANNEL: 64
USE_BIAS_BEFORE_NORM: True
NUM_HM_CONV: 2
SEPARATE_HEAD_CFG:
HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ]
HEAD_DICT: {
'center': { 'out_channels': 2, 'num_conv': 2 },
'center_z': { 'out_channels': 1, 'num_conv': 2 },
'dim': { 'out_channels': 3, 'num_conv': 2 },
'rot': { 'out_channels': 2, 'num_conv': 2 },
}
TARGET_ASSIGNER_CONFIG:
FEATURE_MAP_STRIDE: 8
NUM_MAX_OBJS: 500
GAUSSIAN_OVERLAP: 0.1
MIN_RADIUS: 2
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
}
POST_PROCESSING:
SCORE_THRESH: 0.1
POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ]
MAX_OBJ_PER_SAMPLE: 500
NMS_CONFIG:
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
PFE:
NAME: VoxelSetAbstraction
POINT_SOURCE: raw_points
NUM_KEYPOINTS: 4096
NUM_OUTPUT_FEATURES: 90
SAMPLE_METHOD: SPC
SPC_SAMPLING:
NUM_SECTORS: 6
SAMPLE_RADIUS_WITH_ROI: 1.6
FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points']
SA_LAYER:
raw_points:
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: local_interpolation
NUM_REDUCED_CHANNELS: 3
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [ 32 ]
FILTER_NEIGHBOR_WITH_ROI: True
RADIUS_OF_NEIGHBOR_WITH_ROI: 2.4
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [ 2, 2, 2 ]
MAX_NEIGHBOR_DISTANCE: 0.2
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 32, 32 ]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 0.4
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 32, 32 ]
x_conv3:
DOWNSAMPLE_FACTOR: 4
INPUT_CHANNELS: 64
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: local_interpolation
NUM_REDUCED_CHANNELS: 32
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [128]
FILTER_NEIGHBOR_WITH_ROI: True
RADIUS_OF_NEIGHBOR_WITH_ROI: 4.0
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [3, 3, 3]
MAX_NEIGHBOR_DISTANCE: 1.2
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [64, 64]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 2.4
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 64, 64 ]
x_conv4:
DOWNSAMPLE_FACTOR: 8
INPUT_CHANNELS: 64
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: local_interpolation
NUM_REDUCED_CHANNELS: 32
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [ 128 ]
FILTER_NEIGHBOR_WITH_ROI: True
RADIUS_OF_NEIGHBOR_WITH_ROI: 6.4
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 2.4
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 64, 64 ]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 4.8
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 64, 64 ]
POINT_HEAD:
NAME: PointHeadSimple
CLS_FC: [256, 256]
CLASS_AGNOSTIC: True
USE_POINT_FEATURES_BEFORE_FUSION: True
TARGET_CONFIG:
GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
LOSS_CONFIG:
LOSS_REG: smooth-l1
LOSS_WEIGHTS: {
'point_cls_weight': 1.0,
}
ROI_HEAD:
NAME: PVRCNNHead
CLASS_AGNOSTIC: True
SHARED_FC: [256, 256]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.3
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 1024
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.7
SCORE_THRESH: 0.1
# NMS_PRE_MAXSIZE: 4096
# NMS_POST_MAXSIZE: 500
# NMS_THRESH: 0.85
ROI_GRID_POOL:
GRID_SIZE: 6
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: voxel_random_choice
NUM_REDUCED_CHANNELS: 30
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [ 128 ]
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 0.8
NEIGHBOR_NSAMPLE: 32
POST_MLPS: [ 64, 64 ]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 1.6
NEIGHBOR_NSAMPLE: 32
POST_MLPS: [ 64, 64 ]
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: waymo
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 2
NUM_EPOCHS: 36
OPTIMIZER: adam_onecycle
LR: 0.01
WEIGHT_DECAY: 0.001
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10