diff --git a/tools/cfgs/nuscenes_models/bevfusion.yaml b/tools/cfgs/nuscenes_models/bevfusion.yaml new file mode 100644 index 0000000..b84cfd9 --- /dev/null +++ b/tools/cfgs/nuscenes_models/bevfusion.yaml @@ -0,0 +1,208 @@ +CLASS_NAMES: ['car','truck', 'construction_vehicle', 'bus', 'trailer', + 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml + POINT_CLOUD_RANGE: [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0] + CAMERA_CONFIG: + USE_CAMERA: True + IMAGE: + FINAL_DIM: [256,704] + RESIZE_LIM_TRAIN: [0.38, 0.55] + RESIZE_LIM_TEST: [0.48, 0.48] + + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x', 'y'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.9, 1.1] + + - NAME: random_world_translation + NOISE_TRANSLATE_STD: [0.5, 0.5, 0.5] + + - NAME: imgaug + ROT_LIM: [-5.4, 5.4] + RAND_FLIP: True + + DATA_PROCESSOR: + - NAME: mask_points_and_boxes_outside_range + REMOVE_OUTSIDE_BOXES: True + + - NAME: shuffle_points + SHUFFLE_ENABLED: { + 'train': True, + 'test': True + } + + - NAME: transform_points_to_voxels + VOXEL_SIZE: [0.075, 0.075, 0.2] + MAX_POINTS_PER_VOXEL: 10 + MAX_NUMBER_OF_VOXELS: { + 'train': 120000, + 'test': 160000 + } + + - NAME: image_calibrate + + - NAME: image_normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + + +MODEL: + NAME: BevFusion + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelResBackBone8x + USE_BIAS: False + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + IMAGE_BACKBONE: + NAME: SwinTransformer + EMBED_DIMS: 96 + DEPTHS: [2, 2, 6, 2] + NUM_HEADS: [3, 6, 12, 24] + WINDOW_SIZE: 7 + MLP_RATIO: 4 + DROP_RATE: 0. + ATTN_DROP_RATE: 0. + DROP_PATH_RATE: 0.2 + PATCH_NORM: True + OUT_INDICES: [1, 2, 3] + WITH_CP: False + CONVERT_WEIGHTS: True + INIT_CFG: + type: Pretrained + checkpoint: swint-nuimages-pretrained.pth + + NECK: + NAME: GeneralizedLSSFPN + IN_CHANNELS: [192, 384, 768] + OUT_CHANNELS: 256 + START_LEVEL: 0 + END_LEVEL: -1 + NUM_OUTS: 3 + + VTRANSFORM: + NAME: DepthLSSTransform + IMAGE_SIZE: [256, 704] + IN_CHANNEL: 256 + OUT_CHANNEL: 80 + FEATURE_SIZE: [32, 88] + XBOUND: [-54.0, 54.0, 0.3] + YBOUND: [-54.0, 54.0, 0.3] + ZBOUND: [-10.0, 10.0, 20.0] + DBOUND: [1.0, 60.0, 0.5] + DOWNSAMPLE: 2 + + FUSER: + NAME: ConvFuser + IN_CHANNEL: 336 + OUT_CHANNEL: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + USE_CONV_FOR_NO_STRIDE: True + + + DENSE_HEAD: + CLASS_AGNOSTIC: False + NAME: TransFusionHead + + USE_BIAS_BEFORE_NORM: False + + NUM_PROPOSALS: 200 + HIDDEN_CHANNEL: 128 + NUM_CLASSES: 10 + NUM_HEADS: 8 + NMS_KERNEL_SIZE: 3 + FFN_CHANNEL: 256 + DROPOUT: 0.1 + BN_MOMENTUM: 0.1 + ACTIVATION: relu + + NUM_HM_CONV: 2 + SEPARATE_HEAD_CFG: + HEAD_ORDER: ['center', 'height', 'dim', 'rot', 'vel'] + HEAD_DICT: { + 'center': {'out_channels': 2, 'num_conv': 2}, + 'height': {'out_channels': 1, 'num_conv': 2}, + 'dim': {'out_channels': 3, 'num_conv': 2}, + 'rot': {'out_channels': 2, 'num_conv': 2}, + 'vel': {'out_channels': 2, 'num_conv': 2}, + } + + TARGET_ASSIGNER_CONFIG: + FEATURE_MAP_STRIDE: 8 + DATASET: nuScenes + GAUSSIAN_OVERLAP: 0.1 + MIN_RADIUS: 2 + HUNGARIAN_ASSIGNER: + cls_cost: {'gamma': 2.0, 'alpha': 0.25, 'weight': 0.15} + reg_cost: {'weight': 0.25} + iou_cost: {'weight': 0.25} + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'bbox_weight': 0.25, + 'hm_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2] + } + LOSS_CLS: + use_sigmoid: True + gamma: 2.0 + alpha: 0.25 + + POST_PROCESSING: + SCORE_THRESH: 0.0 + POST_CENTER_RANGE: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0] + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 3 + NUM_EPOCHS: 6 + + OPTIMIZER: adam_cosineanneal + LR: 0.0001 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + BETAS: [0.9, 0.999] + + MOMS: [0.9, 0.8052631] + PCT_START: 0.4 + WARMUP_ITER: 500 + + DECAY_STEP_LIST: [35, 45] + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 35 + + LOSS_SCALE_FP16: 32 + \ No newline at end of file