From 10fc32242e5318e7cae576d36c1cdc96f1e92595 Mon Sep 17 00:00:00 2001 From: inter Date: Sun, 21 Sep 2025 20:19:22 +0800 Subject: [PATCH] Add File --- pcdet/ops/iou3d_nms/src/iou3d_nms.cpp | 235 ++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 pcdet/ops/iou3d_nms/src/iou3d_nms.cpp diff --git a/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp b/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp new file mode 100644 index 0000000..179a26c --- /dev/null +++ b/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp @@ -0,0 +1,235 @@ +/* +3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) +Written by Shaoshuai Shi +All Rights Reserved 2019-2020. +*/ + +#include +#include +#include +#include +#include +#include "iou3d_nms.h" + +#define CHECK_CUDA(x) do { \ + if (!x.type().is_cuda()) { \ + fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ + exit(-1); \ + } \ +} while (0) +#define CHECK_CONTIGUOUS(x) do { \ + if (!x.is_contiguous()) { \ + fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ + exit(-1); \ + } \ +} while (0) +#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) + +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) + +#define CHECK_ERROR(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) +{ + if (code != cudaSuccess) + { + fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } +} + +const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; + +void boxesalignedoverlapLauncher(const int num_box, const float *boxes_a, const float *boxes_b, float *ans_overlap); +void boxesoverlapLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap); +void PairedBoxesOverlapLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap); +void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou); +void nmsLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh); +void nmsNormalLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh); + + +int boxes_aligned_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_overlap){ + // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] + // params boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading] + // params ans_overlap: (N, 1) + + CHECK_INPUT(boxes_a); + CHECK_INPUT(boxes_b); + CHECK_INPUT(ans_overlap); + + int num_box = boxes_a.size(0); + int num_b = boxes_b.size(0); + + assert(num_box == num_b); + + const float * boxes_a_data = boxes_a.data(); + const float * boxes_b_data = boxes_b.data(); + float * ans_overlap_data = ans_overlap.data(); + + boxesalignedoverlapLauncher(num_box, boxes_a_data, boxes_b_data, ans_overlap_data); + + return 1; +} + +int boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_overlap){ + // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] + // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] + // params ans_overlap: (N, M) + + CHECK_INPUT(boxes_a); + CHECK_INPUT(boxes_b); + CHECK_INPUT(ans_overlap); + + int num_a = boxes_a.size(0); + int num_b = boxes_b.size(0); + + const float * boxes_a_data = boxes_a.data(); + const float * boxes_b_data = boxes_b.data(); + float * ans_overlap_data = ans_overlap.data(); + + boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_overlap_data); + + return 1; +} + +int paired_boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_overlap){ + // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] + // params boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading] + // params ans_overlap: (N, 1) + + CHECK_INPUT(boxes_a); + CHECK_INPUT(boxes_b); + CHECK_INPUT(ans_overlap); + + int num_a = boxes_a.size(0); + int num_b = boxes_b.size(0); + + assert(num_a == num_b); + + const float * boxes_a_data = boxes_a.data(); + const float * boxes_b_data = boxes_b.data(); + float * ans_overlap_data = ans_overlap.data(); + + PairedBoxesOverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_overlap_data); + + return 1; +} + +int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_iou){ + // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] + // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] + // params ans_overlap: (N, M) + CHECK_INPUT(boxes_a); + CHECK_INPUT(boxes_b); + CHECK_INPUT(ans_iou); + + int num_a = boxes_a.size(0); + int num_b = boxes_b.size(0); + + const float * boxes_a_data = boxes_a.data(); + const float * boxes_b_data = boxes_b.data(); + float * ans_iou_data = ans_iou.data(); + + boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data); + + return 1; +} + +int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){ + // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading] + // params keep: (N) + CHECK_INPUT(boxes); + CHECK_CONTIGUOUS(keep); + + int boxes_num = boxes.size(0); + const float * boxes_data = boxes.data(); + long * keep_data = keep.data(); + + const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + + unsigned long long *mask_data = NULL; + CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long))); + nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); + + // unsigned long long mask_cpu[boxes_num * col_blocks]; + // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks]; + std::vector mask_cpu(boxes_num * col_blocks); + +// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); + CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long), + cudaMemcpyDeviceToHost)); + + cudaFree(mask_data); + + unsigned long long remv_cpu[col_blocks]; + memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); + + int num_to_keep = 0; + + for (int i = 0; i < boxes_num; i++){ + int nblock = i / THREADS_PER_BLOCK_NMS; + int inblock = i % THREADS_PER_BLOCK_NMS; + + if (!(remv_cpu[nblock] & (1ULL << inblock))){ + keep_data[num_to_keep++] = i; + unsigned long long *p = &mask_cpu[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++){ + remv_cpu[j] |= p[j]; + } + } + } + if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" ); + + return num_to_keep; +} + + +int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){ + // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading] + // params keep: (N) + + CHECK_INPUT(boxes); + CHECK_CONTIGUOUS(keep); + + int boxes_num = boxes.size(0); + const float * boxes_data = boxes.data(); + long * keep_data = keep.data(); + + const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + + unsigned long long *mask_data = NULL; + CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long))); + nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); + + // unsigned long long mask_cpu[boxes_num * col_blocks]; + // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks]; + std::vector mask_cpu(boxes_num * col_blocks); + +// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); + CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long), + cudaMemcpyDeviceToHost)); + + cudaFree(mask_data); + + unsigned long long remv_cpu[col_blocks]; + memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); + + int num_to_keep = 0; + + for (int i = 0; i < boxes_num; i++){ + int nblock = i / THREADS_PER_BLOCK_NMS; + int inblock = i % THREADS_PER_BLOCK_NMS; + + if (!(remv_cpu[nblock] & (1ULL << inblock))){ + keep_data[num_to_keep++] = i; + unsigned long long *p = &mask_cpu[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++){ + remv_cpu[j] |= p[j]; + } + } + } + if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" ); + + return num_to_keep; +} + +