From e6328c58f5169fcb659359e6fcb0838062d0dc42 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Feb 2020 13:24:00 -0800 Subject: [PATCH 01/16] find peaks cpp --- trt_pose/plugins/find_peaks.cpp | 141 ++++++++++++++++---------------- trt_pose/plugins/find_peaks.hpp | 22 ++++- trt_pose/plugins/plugins.cpp | 61 ++++++++++++-- trt_pose/plugins/test_all.cpp | 48 +++++++++++ 4 files changed, 189 insertions(+), 83 deletions(-) create mode 100644 trt_pose/plugins/test_all.cpp diff --git a/trt_pose/plugins/find_peaks.cpp b/trt_pose/plugins/find_peaks.cpp index b732d09..7a09b17 100644 --- a/trt_pose/plugins/find_peaks.cpp +++ b/trt_pose/plugins/find_peaks.cpp @@ -1,78 +1,77 @@ #include "find_peaks.hpp" +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) -void find_peaks_out(torch::Tensor counts, torch::Tensor peaks, torch::Tensor input, float threshold, int window_size, int max_count) -{ - auto counts_a = counts.accessor(); - auto peaks_a = peaks.accessor(); - auto input_a = input.accessor(); - - int w = window_size / 2; - int width = input.size(3); - int height = input.size(2); - - for (int b = 0; b < input.size(0); b++) - { - for (int c = 0; c < input.size(1); c++) - { - int count = 0; - auto peaks_a_bc = peaks_a[b][c]; - auto input_a_bc = input_a[b][c]; - - for (int i = 0; i < height && count < max_count; i++) - { - for (int j = 0; j < width && count < max_count; j++) - { - float value = input_a_bc[i][j]; - - if (value < threshold) - continue; - - int ii_min = i - w; - int jj_min = j - w; - int ii_max = i + w + 1; - int jj_max = j + w + 1; - - if (ii_min < 0) ii_min = 0; - if (ii_max > height) ii_max = height; - if (jj_min < 0) jj_min = 0; - if (jj_max > width) jj_max = width; - - // get max - bool is_peak = true; - for (int ii = ii_min; ii < ii_max; ii++) - { - for (int jj = jj_min; jj < jj_max; jj++) - { - if (input_a_bc[ii][jj] > value) { - is_peak = false; - } - } - } - - if (is_peak) { - peaks_a_bc[count][0] = i; - peaks_a_bc[count][1] = j; - count++; - } - } - } - - counts_a[b][c] = count; +void find_peaks_out_hw(int *counts, // 1 + int *peaks, // Mx2 + const float *input, // HxW + const int H, const int W, const int M, + const float threshold, const int window_size) { + int win = window_size / 2; + int count = 0; + + for (int i = 0; i < H && count < M; i++) { + for (int j = 0; j < W && count < M; j++) { + float val = input[i * W + j]; + + // skip if below threshold + if (val < threshold) + continue; + + // compute window bounds + int ii_min = MAX(i - win, 0); + int jj_min = MAX(j - win, 0); + int ii_max = MIN(i + win + 1, H); + int jj_max = MIN(j + win + 1, W); + + // search for larger value in window + bool is_peak = true; + for (int ii = ii_min; ii < ii_max; ii++) { + for (int jj = jj_min; jj < jj_max; jj++) { + if (input[ii * W + jj] > val) { + is_peak = false; + } } + } + + // add peak + if (is_peak) { + peaks[count * 2] = i; + peaks[count * 2 + 1] = j; + count++; + } } + } + + *counts = count; +} + +void find_peaks_out_chw(int *counts, // C + int *peaks, // CxMx2 + const float *input, // CxHxW + const int C, const int H, const int W, const int M, + const float threshold, const int window_size) { + for (int c = 0; c < C; c++) { + int *counts_c = &counts[c]; + int *peaks_c = &peaks[c * M * 2]; + const float *input_c = &input[c * H * W]; + find_peaks_out_hw(counts_c, peaks_c, input_c, H, W, M, threshold, + window_size); + } } -std::vector find_peaks(torch::Tensor input, float threshold, int window_size, int max_count) -{ - auto options = torch::TensorOptions() - .dtype(torch::kInt32) - .layout(torch::kStrided) - .device(torch::kCPU) - .requires_grad(false); - - auto counts = torch::zeros({input.size(0), input.size(1)}, options); - auto peaks = torch::zeros({input.size(0), input.size(1), max_count, 2}, options); // valid, i, j - find_peaks_out(counts, peaks, input, threshold, window_size, max_count); - return {counts, peaks}; -} \ No newline at end of file +void find_peaks_out_nchw(int *counts, // C + int *peaks, // CxMx2 + const float *input, // CxHxW + const int N, const int C, const int H, const int W, + const int M, const float threshold, + const int window_size) { + for (int n = 0; n < N; n++) { + int *counts_n = &counts[n * C]; + int *peaks_n = &peaks[n * C * M * 2]; + const float *input_n = &input[n * C * H * W]; + find_peaks_out_chw(counts_n, peaks_n, input_n, C, H, W, M, threshold, + window_size); + } +} diff --git a/trt_pose/plugins/find_peaks.hpp b/trt_pose/plugins/find_peaks.hpp index e489051..f596bdd 100644 --- a/trt_pose/plugins/find_peaks.hpp +++ b/trt_pose/plugins/find_peaks.hpp @@ -1,6 +1,20 @@ -#include -#include +#pragma once +void find_peaks_out_hw(int *counts, // 1 + int *peaks, // Mx2 + const float *input, // HxW + const int H, const int W, const int M, + const float threshold, const int window_size); -void find_peaks_out(torch::Tensor counts, torch::Tensor peaks, torch::Tensor input, float threshold, int window_size, int max_count); -std::vector find_peaks(torch::Tensor input, float threshold, int window_size, int max_count); \ No newline at end of file +void find_peaks_out_chw(int *counts, // C + int *peaks, // CxMx2 + const float *input, // CxHxW + const int C, const int H, const int W, const int M, + const float threshold, const int window_size); + +void find_peaks_out_nchw(int *counts, // NxC + int *peaks, // NxCxMx2 + const float *input, // NxCxHxW + const int N, const int C, const int H, const int W, + const int M, const float threshold, + const int window_size); diff --git a/trt_pose/plugins/plugins.cpp b/trt_pose/plugins/plugins.cpp index 720fc8d..9c0f82f 100644 --- a/trt_pose/plugins/plugins.cpp +++ b/trt_pose/plugins/plugins.cpp @@ -1,16 +1,61 @@ -#include -#include "find_peaks.hpp" -#include "paf_score_graph.hpp" -#include "refine_peaks.hpp" -#include "munkres.hpp" #include "connect_parts.hpp" +#include "find_peaks.hpp" #include "generate_cmap.hpp" #include "generate_paf.hpp" +#include "munkres.hpp" +#include "paf_score_graph.hpp" +#include "refine_peaks.hpp" +#include +#include + +void find_peaks_out_torch(torch::Tensor counts, torch::Tensor peaks, + torch::Tensor input, const float threshold, + const int window_size, const int max_count) { + const int N = input.size(0); + const int C = input.size(1); + const int H = input.size(2); + const int W = input.size(3); + const int M = max_count; + + // get pointers to tensor data + int *counts_ptr = (int *)counts.data_ptr(); + int *peaks_ptr = (int *)peaks.data_ptr(); + const float *input_ptr = (const float *)input.data_ptr(); + + // find peaks + find_peaks_out_nchw(counts_ptr, peaks_ptr, input_ptr, N, C, H, W, M, + threshold, window_size); +} + +std::vector find_peaks_torch(torch::Tensor input, + const float threshold, + const int window_size, + const int max_count) { + auto options = torch::TensorOptions() + .dtype(torch::kInt32) + .layout(torch::kStrided) + .device(torch::kCPU) + .requires_grad(false); + + const int N = input.size(0); + const int C = input.size(1); + const int H = input.size(2); + const int W = input.size(3); + const int M = max_count; + + // create output tensors + auto counts = torch::zeros({N, C}, options); + auto peaks = torch::zeros({N, C, M, 2}, options); + + // find peaks + find_peaks_out_torch(counts, peaks, input, threshold, window_size, max_count); + return {counts, peaks}; +} PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("find_peaks", &find_peaks, "find_peaks"); - m.def("find_peaks_out", &find_peaks_out, "find_peaks_out"); + m.def("find_peaks", &find_peaks_torch, "find_peaks"); + m.def("find_peaks_out", &find_peaks_out_torch, "find_peaks_out"); m.def("paf_score_graph", &paf_score_graph, "paf_score_graph"); m.def("paf_score_graph_out", &paf_score_graph_out, "paf_score_graph_out"); m.def("refine_peaks", &refine_peaks, "refine_peaks"); @@ -21,4 +66,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("assignment_out", &assignment_out, "assignment_out"); m.def("generate_cmap", &generate_cmap, "generate_cmap"); m.def("generate_paf", &generate_paf, "generate_paf"); -} \ No newline at end of file +} diff --git a/trt_pose/plugins/test_all.cpp b/trt_pose/plugins/test_all.cpp new file mode 100644 index 0000000..aaab2bd --- /dev/null +++ b/trt_pose/plugins/test_all.cpp @@ -0,0 +1,48 @@ +#include "find_peaks.hpp" +#include + + +void test_find_peaks_out_hw() +{ + const int N = 1; + const int C = 2; + const int H = 4; + const int W = 4; + const int M = 10; + const float threshold = 2.0; + const int window_size = 3; + + int counts[N * C]; + int peaks[N * C * M * 2]; + float input[N * C * H * W] = { + + 0., 0., 0., 0., + 0., 0., 3., 0., + 0., 0., 0., 0., + 1., 0., 0., 0., + + 0., 0., 0., 0., + 0., 0., 0., 0., + 0., 0., 0., 0., + 0., 0., 0., 0. + + }; + + find_peaks_out_nchw(counts, peaks, input, N, C, H, W, M, threshold, window_size); + + if (counts[0] != 1) { + throw std::runtime_error("Number of peaks should be 1."); + } + if (peaks[0] != 1) { + throw std::runtime_error("Peak i coordinate should be 1."); + } + if (peaks[1] != 2) { + throw std::runtime_error("Peak j coordinate should be 2,"); + } +} + +int main() +{ + test_find_peaks_out_hw(); + return 0; +} From daceb807b236d6a4a4625e262eecde65e0519122 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Feb 2020 14:26:04 -0800 Subject: [PATCH 02/16] trt_pose cpp --- trt_pose/plugins/refine_peaks.cpp | 140 +++++++++++++++--------------- trt_pose/plugins/refine_peaks.hpp | 40 ++++++++- trt_pose/plugins/test_all.cpp | 58 ++++++++++--- 3 files changed, 149 insertions(+), 89 deletions(-) diff --git a/trt_pose/plugins/refine_peaks.cpp b/trt_pose/plugins/refine_peaks.cpp index 0aa637b..44313c1 100644 --- a/trt_pose/plugins/refine_peaks.cpp +++ b/trt_pose/plugins/refine_peaks.cpp @@ -1,78 +1,76 @@ #include "refine_peaks.hpp" +inline int reflect(int idx, int min, int max) { + if (idx < min) { + return -idx; + } else if (idx >= max) { + return max - (idx - max) - 2; + } else { + return idx; + } +} + +void refine_peaks_out_hw(float *refined_peaks, // Mx2 + const int *counts, // 1 + const int *peaks, // Mx2 + const float *cmap, // HxW + const int H, const int W, const int M, + const int window_size) { + int count = *counts; + int win = window_size / 2; + + for (int m = 0; m < count; m++) { + float *refined_peak = &refined_peaks[m * 2]; + refined_peak[0] = 0.; + refined_peak[1] = 0.; + const int *peak = &peaks[m * 2]; + + int i = peak[0]; + int j = peak[1]; + float weight_sum = 0.; -void refine_peaks_out(torch::Tensor refined_peaks, torch::Tensor counts, torch::Tensor peaks, torch::Tensor cmap, int window_size) -{ - auto refined_peaks_a = refined_peaks.accessor(); - auto counts_a = counts.accessor(); - auto peaks_a = peaks.accessor(); - auto cmap_a = cmap.accessor(); - - int w = window_size / 2; - int width = cmap.size(3); - int height = cmap.size(2); - - for (int b = 0; b < cmap.size(0); b++) - { - for (int c = 0; c < cmap.size(1); c++) - { - int count = counts_a[b][c]; - auto refined_peaks_a_bc = refined_peaks_a[b][c]; - auto peaks_a_bc = peaks_a[b][c]; - auto cmap_a_bc = cmap_a[b][c]; - - for (int p = 0; p < count; p++) - { - auto refined_peak = refined_peaks_a_bc[p]; - auto peak = peaks_a_bc[p]; - - int i = peak[0]; - int j = peak[1]; - float weight_sum = 0.0f; - - for (int ii = i - w; ii < i + w + 1; ii++) - { - int ii_idx = ii; - - // reflect index at border - if (ii < 0) ii_idx = -ii; - else if (ii >= height) ii_idx = height - (ii - height) - 2; - - for (int jj = j - w; jj < j + w + 1; jj++) - { - int jj_idx = jj; + for (int ii = i - win; ii < i + win + 1; ii++) { + int ii_idx = reflect(ii, 0, H); + for (int jj = j - win; jj < j + win + 1; jj++) { + int jj_idx = reflect(jj, 0, W); - // reflect index at border - if (jj < 0) jj_idx = -jj; - else if (jj >= width) jj_idx = width - (jj - width) - 2; - - float weight = cmap_a_bc[ii_idx][jj_idx]; - refined_peak[0] += weight * ii; - refined_peak[1] += weight * jj; - weight_sum += weight; - } - } - - refined_peak[0] /= weight_sum; - refined_peak[1] /= weight_sum; - refined_peak[0] += 0.5; - refined_peak[1] += 0.5; - refined_peak[0] /= height; - refined_peak[1] /= width; - } - } + float weight = cmap[ii_idx * W + jj_idx]; + refined_peak[0] += weight * ii; + refined_peak[1] += weight * jj; + weight_sum += weight; + } } + + refined_peak[0] /= weight_sum; + refined_peak[1] /= weight_sum; + refined_peak[0] += 0.5; // center pixel + refined_peak[1] += 0.5; // center pixel + refined_peak[0] /= H; // normalize coordinates + refined_peak[1] /= W; // normalize coordinates + } +} + +void refine_peaks_out_chw(float *refined_peaks, // CxMx2 + const int *counts, // C + const int *peaks, // CxMx2 + const float *cmap, const int C, const int H, + const int W, const int M, const int window_size) { + for (int c = 0; c < C; c++) { + refine_peaks_out_hw(&refined_peaks[c * M * 2], &counts[c], + &peaks[c * M * 2], &cmap[c * H * W], H, W, M, + window_size); + } } -torch::Tensor refine_peaks(torch::Tensor counts, torch::Tensor peaks, torch::Tensor cmap, int window_size) -{ - auto options = torch::TensorOptions() - .dtype(torch::kFloat32) - .layout(torch::kStrided) - .device(torch::kCPU) - .requires_grad(false); - - auto refined_peaks = torch::zeros({peaks.size(0), peaks.size(1), peaks.size(2), peaks.size(3)}, options); - refine_peaks_out(refined_peaks, counts, peaks, cmap, window_size); - return refined_peaks; -} \ No newline at end of file +void refine_peaks_out_nchw(float *refined_peaks, // NxCxMx2 + const int *counts, // NxC + const int *peaks, // NxCxMx2 + const float *cmap, const int N, const int C, + const int H, const int W, const int M, + const int window_size) { + for (int n = 0; n < N; n++) { + refine_peaks_out_chw(&refined_peaks[n * C * M * 2], &counts[n * C], + &peaks[n * C * M * 2], &cmap[n * C * H * W], C, H, W, + M, window_size); + } +} diff --git a/trt_pose/plugins/refine_peaks.hpp b/trt_pose/plugins/refine_peaks.hpp index 0009d94..ddd86fe 100644 --- a/trt_pose/plugins/refine_peaks.hpp +++ b/trt_pose/plugins/refine_peaks.hpp @@ -1,6 +1,38 @@ -#include -#include +#pragma once +void refine_peaks_out_hw( + float *refined_peaks, + const int *counts, + const int *peaks, + const float *cmap, + const int H, + const int W, + const int M, + const int window_size +); -void refine_peaks_out(torch::Tensor refined_peaks, torch::Tensor counts, torch::Tensor peaks, torch::Tensor cmap, int window_size); -torch::Tensor refine_peaks(torch::Tensor counts, torch::Tensor peaks, torch::Tensor cmap, int window_size); \ No newline at end of file +void refine_peaks_out_chw( + float *refined_peaks, + const int *counts, + const int *peaks, + const float *cmap, + const int C, + const int H, + const int W, + const int M, + const int window_size +); + + +void refine_peaks_out_nchw( + float *refined_peaks, + const int *counts, + const int *peaks, + const float *cmap, + const int N, + const int C, + const int H, + const int W, + const int M, + const int window_size +); diff --git a/trt_pose/plugins/test_all.cpp b/trt_pose/plugins/test_all.cpp index aaab2bd..190dc5c 100644 --- a/trt_pose/plugins/test_all.cpp +++ b/trt_pose/plugins/test_all.cpp @@ -1,36 +1,31 @@ -#include "find_peaks.hpp" #include +#include "find_peaks.hpp" +#include "refine_peaks.hpp" +#define ABS(x) ((x) > 0 ? (x) : (-x)) void test_find_peaks_out_hw() { - const int N = 1; - const int C = 2; const int H = 4; const int W = 4; const int M = 10; const float threshold = 2.0; const int window_size = 3; - int counts[N * C]; - int peaks[N * C * M * 2]; - float input[N * C * H * W] = { + int counts; + int peaks[M * 2]; + const float input[H * W] = { 0., 0., 0., 0., 0., 0., 3., 0., 0., 0., 0., 0., - 1., 0., 0., 0., - - 0., 0., 0., 0., - 0., 0., 0., 0., - 0., 0., 0., 0., - 0., 0., 0., 0. + 1., 0., 0., 0. }; - find_peaks_out_nchw(counts, peaks, input, N, C, H, W, M, threshold, window_size); + find_peaks_out_hw(&counts, peaks, input, H, W, M, threshold, window_size); - if (counts[0] != 1) { + if (counts != 1) { throw std::runtime_error("Number of peaks should be 1."); } if (peaks[0] != 1) { @@ -41,8 +36,43 @@ void test_find_peaks_out_hw() } } +void test_refined_peaks_out_hw() +{ + const int H = 4; + const int W = 4; + const int M = 1; + const int window_size = 3; + + const int counts = 1; + const int peaks[M * 2] = { 1, 2 }; + const float cmap[H * W] = { + + 0., 0., 1., 0., + 0., 2., 3., 1., + 0., 0., 2., 0., + 0., 0., 0., 0. + + }; + const float i_true = (0.5 + (1. * 0 + 2. * 1 + 3. * 1 + 1. * 1 + 2. * 2) / 9.) / H; + const float j_true = (0.5 + (2. * 1 + 1. * 2 + 3. * 2 + 2. * 2 + 1. * 3) / 9.) / W; + const float tolerance = 1e-5; + + float refined_peaks[M * 2]; + + refine_peaks_out_hw(refined_peaks, &counts, peaks, cmap, H, W, M, window_size); + + if (ABS(refined_peaks[0] - i_true) > tolerance) { + throw std::runtime_error("i coordinate incorrect"); + } + if (ABS(refined_peaks[1] - j_true) > tolerance) { + throw std::runtime_error("j coordinate incorrect"); + } + +} + int main() { test_find_peaks_out_hw(); + test_refined_peaks_out_hw(); return 0; } From 6d3b3fc7b7594c66bb8ffd76b40cc52cac2bce72 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Feb 2020 14:49:30 -0800 Subject: [PATCH 03/16] refine peaks cpp --- trt_pose/plugins/plugins.cpp | 39 ++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/trt_pose/plugins/plugins.cpp b/trt_pose/plugins/plugins.cpp index 9c0f82f..5985580 100644 --- a/trt_pose/plugins/plugins.cpp +++ b/trt_pose/plugins/plugins.cpp @@ -53,13 +53,48 @@ std::vector find_peaks_torch(torch::Tensor input, return {counts, peaks}; } +void refine_peaks_out_torch(torch::Tensor refined_peaks, torch::Tensor counts, torch::Tensor peaks, torch::Tensor cmap, int window_size) +{ + const int N = cmap.size(0); + const int C = cmap.size(1); + const int H = cmap.size(2); + const int W = cmap.size(3); + const int M = peaks.size(2); + + refine_peaks_out_nchw( + (float *) refined_peaks.data_ptr(), + (const int*) counts.data_ptr(), + (const int*) peaks.data_ptr(), + (const float*) cmap.data_ptr(), + N, + C, + H, + W, + M, + window_size + ); +} + +torch::Tensor refine_peaks_torch(torch::Tensor counts, torch::Tensor peaks, torch::Tensor cmap, int window_size) +{ + auto options = torch::TensorOptions() + .dtype(torch::kFloat32) + .layout(torch::kStrided) + .device(torch::kCPU) + .requires_grad(false); + + auto refined_peaks = torch::zeros({peaks.size(0), peaks.size(1), peaks.size(2), peaks.size(3)}, options); + refine_peaks_out_torch(refined_peaks, counts, peaks, cmap, window_size); + return refined_peaks; +} + PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("find_peaks", &find_peaks_torch, "find_peaks"); m.def("find_peaks_out", &find_peaks_out_torch, "find_peaks_out"); m.def("paf_score_graph", &paf_score_graph, "paf_score_graph"); m.def("paf_score_graph_out", &paf_score_graph_out, "paf_score_graph_out"); - m.def("refine_peaks", &refine_peaks, "refine_peaks"); - m.def("refine_peaks_out", &refine_peaks_out, "refine_peaks_out"); + m.def("refine_peaks", &refine_peaks_torch, "refine_peaks"); + m.def("refine_peaks_out", &refine_peaks_out_torch, "refine_peaks_out"); m.def("munkres", &munkres, "munkres"); m.def("connect_parts", &connect_parts, "connect_parts"); m.def("assignment", &assignment, "assignment"); From 1a7ea4fa342dbe571c066106f9d13adeb7751920 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Feb 2020 14:51:35 -0800 Subject: [PATCH 04/16] reformatted refine peaks header --- trt_pose/plugins/refine_peaks.hpp | 51 +++++++++++-------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/trt_pose/plugins/refine_peaks.hpp b/trt_pose/plugins/refine_peaks.hpp index ddd86fe..fe13d5e 100644 --- a/trt_pose/plugins/refine_peaks.hpp +++ b/trt_pose/plugins/refine_peaks.hpp @@ -1,38 +1,21 @@ #pragma once -void refine_peaks_out_hw( - float *refined_peaks, - const int *counts, - const int *peaks, - const float *cmap, - const int H, - const int W, - const int M, - const int window_size -); +void refine_peaks_out_hw(float *refined_peaks, // Mx2 + const int *counts, // 1 + const int *peaks, // Mx2 + const float *cmap, // HxW + const int H, const int W, const int M, + const int window_size); -void refine_peaks_out_chw( - float *refined_peaks, - const int *counts, - const int *peaks, - const float *cmap, - const int C, - const int H, - const int W, - const int M, - const int window_size -); +void refine_peaks_out_chw(float *refined_peaks, // CxMx2 + const int *counts, // C + const int *peaks, // CxMx2 + const float *cmap, const int C, const int H, + const int W, const int M, const int window_size); - -void refine_peaks_out_nchw( - float *refined_peaks, - const int *counts, - const int *peaks, - const float *cmap, - const int N, - const int C, - const int H, - const int W, - const int M, - const int window_size -); +void refine_peaks_out_nchw(float *refined_peaks, // NxCxMx2 + const int *counts, // NxC + const int *peaks, // NxCxMx2 + const float *cmap, const int N, const int C, + const int H, const int W, const int M, + const int window_size); From fe9ad40d8c8ef079b0d188d5e632b60750131370 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Feb 2020 14:53:06 -0800 Subject: [PATCH 05/16] reformat --- .ipynb_checkpoints/README-checkpoint.md | 72 +++++++++++++++++++++++++ trt_pose/plugins/plugins.cpp | 40 ++++++-------- 2 files changed, 89 insertions(+), 23 deletions(-) create mode 100644 .ipynb_checkpoints/README-checkpoint.md diff --git a/.ipynb_checkpoints/README-checkpoint.md b/.ipynb_checkpoints/README-checkpoint.md new file mode 100644 index 0000000..2b172e2 --- /dev/null +++ b/.ipynb_checkpoints/README-checkpoint.md @@ -0,0 +1,72 @@ +# TensorRT Pose Estimation + +This project features multi-instance pose estimation accelerated by NVIDIA TensorRT. It is ideal for applications where low latency is necessary. It includes + +- Training scripts to train on any keypoint task data in MSCOCO format + +- A collection of models that may be easily optimized with TensorRT using [torch2trt](https://github.com/NVIDIA-AI-IOT/torch2trt) + +This project can be used easily for the task of human pose estimation, or extended for something new. + +If you run into any issues please [let us know](../../issues). + +## Tasks + +### Human pose estimation + + + +This task involves detecting human body pose using models trained on the MSCOCO dataset. + +#### Models + +Below are models pre-trained on the MSCOCO dataset. The throughput in FPS is shown for each platform + +| Model | Jetson Nano | Jetson Xavier | Weights | +|-------|-------------|---------------|---------| +| resnet18_baseline_att_224x224_A | 22 | 251 | [download (81MB)](https://drive.google.com/open?id=1XYDdCUdiF2xxx4rznmLb62SdOUZuoNbd) | +| densenet121_baseline_att_256x256_B | 12 | 101 | [download (84MB)](https://drive.google.com/open?id=13FkJkx7evQ1WwP54UmdiDXWyFMY1OxDU) | + + +#### Live demo + +To run the live Jupyter Notebook demo on real-time camera input, follow these steps + +1. Place the downloaded weights in the [tasks/human_pose](tasks/human_pose) directory + +2. Open and follow the [live_demo.ipynb](tasks/human_pose/live_demo.ipynb) notebook + + > You may need to modify the notebook, depending on which model you use + +## Setup + +To install trt_pose, call this command + +> We assume you have already installed PyTorch, torchvision, and TensorRT + +```bash +sudo pip3 install tqdm cython pycocotools +sudo apt-get install python3-matplotlib +git clone https://github.com/NVIDIA-AI-IOT/trt_pose +cd trt_pose +sudo python3 setup.py install +``` + +## See also + +- [torch2trt](http://github.com/NVIDIA-AI-IOT/torch2trt) - An easy to use PyTorch to TensorRT converter + +- [JetBot](http://github.com/NVIDIA-AI-IOT/jetbot) - An educational AI robot based on NVIDIA Jetson Nano +- [JetRacer](http://github.com/NVIDIA-AI-IOT/jetracer) - An educational AI racecar using NVIDIA Jetson Nano +- [JetCam](http://github.com/NVIDIA-AI-IOT/jetcam) - An easy to use Python camera interface for NVIDIA Jetson + +## References + +Cao, Zhe, et al. "Realtime multi-person 2d pose estimation using part affinity fields." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017. + +Xiao, Bin, Haiping Wu, and Yichen Wei. "Simple baselines for human pose estimation and tracking." Proceedings of the European Conference on Computer Vision (ECCV). 2018. diff --git a/trt_pose/plugins/plugins.cpp b/trt_pose/plugins/plugins.cpp index 5985580..ae3de67 100644 --- a/trt_pose/plugins/plugins.cpp +++ b/trt_pose/plugins/plugins.cpp @@ -53,8 +53,9 @@ std::vector find_peaks_torch(torch::Tensor input, return {counts, peaks}; } -void refine_peaks_out_torch(torch::Tensor refined_peaks, torch::Tensor counts, torch::Tensor peaks, torch::Tensor cmap, int window_size) -{ +void refine_peaks_out_torch(torch::Tensor refined_peaks, torch::Tensor counts, + torch::Tensor peaks, torch::Tensor cmap, + int window_size) { const int N = cmap.size(0); const int C = cmap.size(1); const int H = cmap.size(2); @@ -62,30 +63,23 @@ void refine_peaks_out_torch(torch::Tensor refined_peaks, torch::Tensor counts, t const int M = peaks.size(2); refine_peaks_out_nchw( - (float *) refined_peaks.data_ptr(), - (const int*) counts.data_ptr(), - (const int*) peaks.data_ptr(), - (const float*) cmap.data_ptr(), - N, - C, - H, - W, - M, - window_size - ); + (float *)refined_peaks.data_ptr(), (const int *)counts.data_ptr(), + (const int *)peaks.data_ptr(), (const float *)cmap.data_ptr(), N, C, H, W, + M, window_size); } -torch::Tensor refine_peaks_torch(torch::Tensor counts, torch::Tensor peaks, torch::Tensor cmap, int window_size) -{ +torch::Tensor refine_peaks_torch(torch::Tensor counts, torch::Tensor peaks, + torch::Tensor cmap, int window_size) { auto options = torch::TensorOptions() - .dtype(torch::kFloat32) - .layout(torch::kStrided) - .device(torch::kCPU) - .requires_grad(false); - - auto refined_peaks = torch::zeros({peaks.size(0), peaks.size(1), peaks.size(2), peaks.size(3)}, options); - refine_peaks_out_torch(refined_peaks, counts, peaks, cmap, window_size); - return refined_peaks; + .dtype(torch::kFloat32) + .layout(torch::kStrided) + .device(torch::kCPU) + .requires_grad(false); + + auto refined_peaks = torch::zeros( + {peaks.size(0), peaks.size(1), peaks.size(2), peaks.size(3)}, options); + refine_peaks_out_torch(refined_peaks, counts, peaks, cmap, window_size); + return refined_peaks; } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { From 793708a2d3eec3c767a6967f5cd7b6bfcd74f7e0 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Feb 2020 16:06:16 -0800 Subject: [PATCH 06/16] paf_score_graph --- trt_pose/plugins/paf_score_graph.cpp | 182 ++++++++++----------------- trt_pose/plugins/paf_score_graph.hpp | 14 ++- trt_pose/plugins/test_all.cpp | 53 ++++++++ 3 files changed, 130 insertions(+), 119 deletions(-) diff --git a/trt_pose/plugins/paf_score_graph.cpp b/trt_pose/plugins/paf_score_graph.cpp index ed9dde5..fbccf54 100644 --- a/trt_pose/plugins/paf_score_graph.cpp +++ b/trt_pose/plugins/paf_score_graph.cpp @@ -1,120 +1,74 @@ #include "paf_score_graph.hpp" #include -#define EPS 1e-6 +#define EPS 1e-5 -void paf_score_graph_out(torch::Tensor score_graph, torch::Tensor paf, torch::Tensor topology, torch::Tensor counts, torch::Tensor peaks, int num_integral_samples) -{ - int N = paf.size(0); - int K = topology.size(0); - int M = peaks.size(2); - int H = paf.size(2); - int W = paf.size(3); - - auto score_graph_a = score_graph.accessor(); - auto paf_a = paf.accessor(); - auto topology_a = topology.accessor(); - auto counts_acc = counts.accessor(); - auto peaks_acc = peaks.accessor(); - - for (int n = 0; n < N; n++) - { - for (int k = 0; k < K; k++) - { - auto score_graph_nk = score_graph_a[n][k]; - auto paf_i_idx = topology_a[k][0]; - auto paf_j_idx = topology_a[k][1]; - auto cmap_a_idx = topology_a[k][2]; - auto cmap_b_idx = topology_a[k][3]; - auto paf_i = paf_a[n][paf_i_idx]; - auto paf_j = paf_a[n][paf_j_idx]; - - auto counts_a = counts_acc[n][cmap_a_idx]; - auto counts_b = counts_acc[n][cmap_b_idx]; - auto peaks_a = peaks_acc[n][cmap_a_idx]; - auto peaks_b = peaks_acc[n][cmap_b_idx]; - - for (int a = 0; a < counts_a; a++) - { - // compute point A - float pa_i = peaks_a[a][0] * H; - float pa_j = peaks_a[a][1] * W; - - for (int b = 0; b < counts_b; b++) - { - // compute point B - float pb_i = peaks_b[b][0] * H; - float pb_j = peaks_b[b][1] * W; - - // compute vector A->B - float pab_i = pb_i - pa_i; - float pab_j = pb_j - pa_j; - - // compute normalized vector A->B - float pab_norm = sqrtf(pab_i * pab_i + pab_j * pab_j) + EPS; - float uab_i = pab_i / pab_norm; - float uab_j = pab_j / pab_norm; - - float integral = 0.0; - float progress = 0.0; - float increment = 1.0f / num_integral_samples; - - for (int t = 0; t < num_integral_samples; t++) - { - // compute integral point T - float progress = (float) t / (float) num_integral_samples; - float pt_i = pa_i + progress * pab_i; //(1.0 - progress) * pa_i + progress * pb_i; - float pt_j = pa_j + progress * pab_j;//(1.0 - progress) * pa_j + progress * pb_j; - - // convert to int - int pt_i_int = (int) pt_i; - int pt_j_int = (int) pt_j; - - // skip point if out of bounds (will weaken integral) - if (pt_i_int < 0) continue; - if (pt_i_int > H) continue; - if (pt_j_int < 0) continue; - if (pt_j_int > W) continue; - - // get vector at integral point from PAF - float pt_paf_i = paf_i[pt_i_int][pt_j_int]; - float pt_paf_j = paf_j[pt_i_int][pt_j_int]; - - // compute dot product of normalized A->B with PAF vector at integral point - float dot = pt_paf_i * uab_i + pt_paf_j * uab_j; - integral += dot; - - progress += increment; - } - - // normalize integral by number of samples - integral /= num_integral_samples; - score_graph_nk[a][b] = integral; - } - } - } - } -} +void paf_score_graph_out_hw(float *score_graph, // MxM + const float *paf_i, // HxW + const float *paf_j, // HxW + const int counts_a, const int counts_b, + const float *peaks_a, // Mx2 + const float *peaks_b, // Mx2 + const int H, const int W, const int M, + const int num_integral_samples) { + for (int a = 0; a < counts_a; a++) { + // compute point A + float pa_i = peaks_a[a * 2] * H; + float pa_j = peaks_a[a * 2 + 1] * W; + + for (int b = 0; b < counts_b; b++) { + // compute point B + float pb_i = peaks_b[b * 2] * H; + float pb_j = peaks_b[b * 2 + 1] * W; + + // compute vector A->B + float pab_i = pb_i - pa_i; + float pab_j = pb_j - pa_j; + + // compute normalized vector A->B + float pab_norm = sqrtf(pab_i * pab_i + pab_j * pab_j) + EPS; + float uab_i = pab_i / pab_norm; + float uab_j = pab_j / pab_norm; + + float integral = 0.; + float progress = 0.; + float increment = 1.f / num_integral_samples; -// paf = Nx(2*K)xHxW -// topology = Kx4 --> (paf_i_idx, paf_j_idx, cmap_a_idx, cmap_b_idx) -// counts = NxC -// peaks = NxCxMx2 -// score_graph = NxKxMxM + for (int t = 0; t < num_integral_samples; t++) { + // compute integral point T + float progress = (float)t / (float)num_integral_samples; + float pt_i = pa_i + progress * pab_i; + float pt_j = pa_j + progress * pab_j; -torch::Tensor paf_score_graph(torch::Tensor paf, torch::Tensor topology, torch::Tensor counts, torch::Tensor peaks, int num_integral_samples) -{ - auto options = torch::TensorOptions() - .dtype(torch::kFloat32) - .layout(torch::kStrided) - .device(torch::kCPU) - .requires_grad(false); - - int N = peaks.size(0); - int K = topology.size(0); - int M = peaks.size(2); - - auto score_graph = torch::zeros({N, K, M, M}, options); - paf_score_graph_out(score_graph, paf, topology, counts, peaks, num_integral_samples); - return score_graph; -} \ No newline at end of file + // convert to int + // note: we do not need to subtract 0.5 when indexing, because + // round(x - 0.5) = int(x) + int pt_i_int = (int)pt_i; + int pt_j_int = (int)pt_j; + + // skip point if out of bounds (will weaken integral) + if (pt_i_int < 0) + continue; + if (pt_i_int >= H) + continue; + if (pt_j_int < 0) + continue; + if (pt_j_int >= W) + continue; + + // get vector at integral point from PAF + float pt_paf_i = paf_i[pt_i_int * W + pt_j_int]; + float pt_paf_j = paf_j[pt_i_int * W + pt_j_int]; + + // compute dot product of normalized A->B with PAF vector at integral + // point + float dot = pt_paf_i * uab_i + pt_paf_j * uab_j; + integral += dot; + progress += increment; + } + + integral /= num_integral_samples; + score_graph[a * M + b] = integral; + } + } +} diff --git a/trt_pose/plugins/paf_score_graph.hpp b/trt_pose/plugins/paf_score_graph.hpp index 8a3db79..c4f1f5d 100644 --- a/trt_pose/plugins/paf_score_graph.hpp +++ b/trt_pose/plugins/paf_score_graph.hpp @@ -1,6 +1,10 @@ -#include -#include +#pragma once - -void paf_score_graph_out(torch::Tensor score_graph, torch::Tensor paf, torch::Tensor topology, torch::Tensor counts, torch::Tensor peaks, int num_integral_samples); -torch::Tensor paf_score_graph(torch::Tensor paf, torch::Tensor topology, torch::Tensor counts, torch::Tensor peaks, int num_integral_samples); \ No newline at end of file +void paf_score_graph_out_hw(float *score_graph, // MxM + const float *paf_i, // HxW + const float *paf_j, // HxW + const int counts_a, const int counts_b, + const float *peaks_a, // Mx2 + const float *peaks_b, // Mx2 + const int H, const int W, const int M, + const int num_integral_samples); diff --git a/trt_pose/plugins/test_all.cpp b/trt_pose/plugins/test_all.cpp index 190dc5c..a68194e 100644 --- a/trt_pose/plugins/test_all.cpp +++ b/trt_pose/plugins/test_all.cpp @@ -1,4 +1,5 @@ #include +#include "paf_score_graph.hpp" #include "find_peaks.hpp" #include "refine_peaks.hpp" @@ -67,12 +68,64 @@ void test_refined_peaks_out_hw() if (ABS(refined_peaks[1] - j_true) > tolerance) { throw std::runtime_error("j coordinate incorrect"); } +} +void test_paf_score_graph_hw() +{ + const int M = 2; + const int H = 4; + const int W = 4; + const int counts_a = 2; + const int counts_b = 2; + const int num_integral_samples = 3; + + float score_graph[M * M]; + + // test points + // + // _ _ _ b + // _ _ _ | + // a - b | + // _ _ _ a + + const float paf_i[H * W] = { + 0., 0., 0., -1., + 0., 0., 0., -1., + 0., 0., 0., -1., + 0., 0., 0., -1. + }; + const float paf_j[H * W] = { + 0., 0., 0., 0., + 0., 0., 0., 0., + 1., 1., 1., 0., + 0., 0., 0., 0. + }; + const float peaks_a[M * 2] = { + 0.625, 0.125, // mid-left + 0.875, 0.875 // bot-right + }; + const float peaks_b[M * 2] = { + 0.625, 0.625, // mid-mid + 0.125, 0.875 // top-right + }; + + paf_score_graph_out_hw( + score_graph, + paf_i, + paf_j, + counts_a, + counts_b, + peaks_a, + peaks_b, + H, W, M, + num_integral_samples + ); } int main() { test_find_peaks_out_hw(); test_refined_peaks_out_hw(); + test_paf_score_graph_hw(); return 0; } From 67d230f9ee5e9bb7ce3fc30235d8d67877bb0524 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Feb 2020 16:29:15 -0800 Subject: [PATCH 07/16] paf batch/k methods --- trt_pose/plugins/paf_score_graph.cpp | 57 ++++++++++++++++++++++++---- trt_pose/plugins/paf_score_graph.hpp | 31 +++++++++++---- 2 files changed, 74 insertions(+), 14 deletions(-) diff --git a/trt_pose/plugins/paf_score_graph.cpp b/trt_pose/plugins/paf_score_graph.cpp index fbccf54..5e1ead4 100644 --- a/trt_pose/plugins/paf_score_graph.cpp +++ b/trt_pose/plugins/paf_score_graph.cpp @@ -4,13 +4,13 @@ #define EPS 1e-5 void paf_score_graph_out_hw(float *score_graph, // MxM - const float *paf_i, // HxW - const float *paf_j, // HxW - const int counts_a, const int counts_b, - const float *peaks_a, // Mx2 - const float *peaks_b, // Mx2 - const int H, const int W, const int M, - const int num_integral_samples) { + const float *paf_i, // HxW + const float *paf_j, // HxW + const int counts_a, const int counts_b, + const float *peaks_a, // Mx2 + const float *peaks_b, // Mx2 + const int H, const int W, const int M, + const int num_integral_samples) { for (int a = 0; a < counts_a; a++) { // compute point A float pa_i = peaks_a[a * 2] * H; @@ -72,3 +72,46 @@ void paf_score_graph_out_hw(float *score_graph, // MxM } } } + +void paf_score_graph_out_khw(float *score_graph, // KxMxM + const int *topology, // Kx4 + const float *paf, // 2KxHxW + const int *counts, // C + const float *peaks, // CxMx2 + const int K, const int C, const int H, const int W, + const int M, const int num_integral_samples) { + for (int k = 0; k < K; k++) { + float *score_graph_k = &score_graph[k * M * M]; + const int *tk = &topology[k * 4]; + const int paf_i_idx = tk[0]; + const int paf_j_idx = tk[1]; + const int cmap_a_idx = tk[2]; + const int cmap_b_idx = tk[3]; + const float *paf_i = &paf[paf_i_idx * H * W]; + const float *paf_j = &paf[paf_j_idx * H * W]; + + const int counts_a = counts[cmap_a_idx]; + const int counts_b = counts[cmap_b_idx]; + const float *peaks_a = &peaks[cmap_a_idx * M * 2]; + const float *peaks_b = &peaks[cmap_b_idx * M * 2]; + + paf_score_graph_out_hw(score_graph_k, paf_i, paf_j, counts_a, counts_b, + peaks_a, peaks_b, H, W, M, num_integral_samples); + } +} + +void paf_score_graph_out_nkhw(float *score_graph, // NxKxMxM + const int *topology, // Kx4 + const float *paf, // Nx2KxHxW + const int *counts, // NxC + const float *peaks, // NxCxMx2 + const int N, const int K, const int C, + const int H, const int W, const int M, + const int num_integral_samples) { + for (int n = 0; n < N; n++) { + paf_score_graph_out_khw(&score_graph[n * K * M * M], topology, + &paf[n * 2 * K * H * W], &counts[n * C], + &peaks[n * C * M * 2], K, C, H, W, M, + num_integral_samples); + } +} diff --git a/trt_pose/plugins/paf_score_graph.hpp b/trt_pose/plugins/paf_score_graph.hpp index c4f1f5d..dda9e61 100644 --- a/trt_pose/plugins/paf_score_graph.hpp +++ b/trt_pose/plugins/paf_score_graph.hpp @@ -1,10 +1,27 @@ #pragma once void paf_score_graph_out_hw(float *score_graph, // MxM - const float *paf_i, // HxW - const float *paf_j, // HxW - const int counts_a, const int counts_b, - const float *peaks_a, // Mx2 - const float *peaks_b, // Mx2 - const int H, const int W, const int M, - const int num_integral_samples); + const float *paf_i, // HxW + const float *paf_j, // HxW + const int counts_a, const int counts_b, + const float *peaks_a, // Mx2 + const float *peaks_b, // Mx2 + const int H, const int W, const int M, + const int num_integral_samples); + +void paf_score_graph_out_khw(float *score_graph, // KxMxM + const int *topology, // Kx4 + const float *paf, // 2KxHxW + const int *counts, // C + const float *peaks, // CxMx2 + const int K, const int C, const int H, const int W, + const int M, const int num_integral_samples); + +void paf_score_graph_out_nkhw(float *score_graph, // NxKxMxM + const int *topology, // Kx4 + const float *paf, // Nx2KxHxW + const int *counts, // NxC + const float *peaks, // NxCxMx2 + const int N, const int K, const int C, + const int H, const int W, const int M, + const int num_integral_samples); From 50b11c75671a678f4aabae5c21d783c1e703b20d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Feb 2020 18:22:55 -0800 Subject: [PATCH 08/16] added munkres cpp --- trt_pose/plugins/munkres.cpp | 359 +++++++++++++++------------------- trt_pose/plugins/munkres.hpp | 30 ++- trt_pose/plugins/test_all.cpp | 36 ++++ 3 files changed, 210 insertions(+), 215 deletions(-) diff --git a/trt_pose/plugins/munkres.cpp b/trt_pose/plugins/munkres.cpp index 26e1582..65fa36b 100644 --- a/trt_pose/plugins/munkres.cpp +++ b/trt_pose/plugins/munkres.cpp @@ -1,56 +1,51 @@ -#include "utils/PairGraph.hpp" -#include "utils/CoverTable.hpp" #include "munkres.hpp" +#include "utils/CoverTable.hpp" +#include "utils/PairGraph.hpp" - -void subMinRow(torch::TensorAccessor cost_graph, int nrows, int ncols) -{ - for (int i = 0; i < nrows; i++) - { +void subMinRow(float *cost_graph, const int M, const int nrows, + const int ncols) { + for (int i = 0; i < nrows; i++) { // find min - float min = cost_graph[i][0]; + float min = cost_graph[i * M]; for (int j = 0; j < ncols; j++) { - float val = cost_graph[i][j]; - if (val < min) { - min = val; - } + float val = cost_graph[i * M + j]; + if (val < min) { + min = val; + } } - + // subtract min for (int j = 0; j < ncols; j++) { - cost_graph[i][j] -= min; + cost_graph[i * M + j] -= min; } } } -void subMinCol(torch::TensorAccessor cost_graph, int nrows, int ncols) -{ - for (int j = 0; j < ncols; j++) - { +void subMinCol(float *cost_graph, const int M, const int nrows, + const int ncols) { + for (int j = 0; j < ncols; j++) { // find min - float min = cost_graph[0][j]; + float min = cost_graph[j]; for (int i = 0; i < nrows; i++) { - float val = cost_graph[i][j]; - if (val < min) { - min = val; - } + float val = cost_graph[i * M + j]; + if (val < min) { + min = val; + } } - + // subtract min for (int i = 0; i < nrows; i++) { - cost_graph[i][j] -= min; + cost_graph[i * M + j] -= min; } } } -void munkresStep1(torch::TensorAccessor cost_graph, PairGraph &star_graph, int nrows, int ncols) -{ - for (int i = 0; i < nrows; i++) - { - for (int j = 0; j < ncols; j++) - { - if (!star_graph.isRowSet(i) && !star_graph.isColSet(j) && (cost_graph[i][j] == 0)) - { +void munkresStep1(const float *cost_graph, const int M, PairGraph &star_graph, + const int nrows, const int ncols) { + for (int i = 0; i < nrows; i++) { + for (int j = 0; j < ncols; j++) { + if (!star_graph.isRowSet(i) && !star_graph.isColSet(j) && + (cost_graph[i * M + j] == 0)) { star_graph.set(i, j); } } @@ -58,14 +53,12 @@ void munkresStep1(torch::TensorAccessor cost_graph, PairGraph &star_gr } // returns 1 if we should exit -bool munkresStep2(const PairGraph &star_graph, CoverTable &cover_table) -{ - int k = star_graph.nrows < star_graph.ncols ? star_graph.nrows : star_graph.ncols; +bool munkresStep2(const PairGraph &star_graph, CoverTable &cover_table) { + int k = + star_graph.nrows < star_graph.ncols ? star_graph.nrows : star_graph.ncols; int count = 0; - for (int j = 0; j < star_graph.ncols; j++) - { - if (star_graph.isColSet(j)) - { + for (int j = 0; j < star_graph.ncols; j++) { + if (star_graph.isColSet(j)) { cover_table.coverCol(j); count++; } @@ -73,22 +66,18 @@ bool munkresStep2(const PairGraph &star_graph, CoverTable &cover_table) return count >= k; } -bool munkresStep3(torch::TensorAccessor cost_graph, const PairGraph &star_graph, PairGraph &prime_graph, CoverTable &cover_table, std::pair &p, int nrows, int ncols) -{ - for (int i = 0; i < nrows; i++) - { - for (int j = 0; j < ncols; j++) - { - if (cost_graph[i][j] == 0 && !cover_table.isCovered(i, j)) - { +bool munkresStep3(const float *cost_graph, const int M, + const PairGraph &star_graph, PairGraph &prime_graph, + CoverTable &cover_table, std::pair &p, + const int nrows, const int ncols) { + for (int i = 0; i < nrows; i++) { + for (int j = 0; j < ncols; j++) { + if (cost_graph[i * M + j] == 0 && !cover_table.isCovered(i, j)) { prime_graph.set(i, j); - if (star_graph.isRowSet(i)) - { + if (star_graph.isRowSet(i)) { cover_table.coverRow(i); cover_table.uncoverCol(star_graph.colForRow(i)); - } - else - { + } else { p.first = i; p.second = j; return 1; @@ -97,212 +86,170 @@ bool munkresStep3(torch::TensorAccessor cost_graph, const PairGraph &s } } return 0; -}; +}; -void munkresStep4(PairGraph &star_graph, PairGraph &prime_graph, CoverTable &cover_table, std::pair p) -{ +void munkresStep4(PairGraph &star_graph, PairGraph &prime_graph, + CoverTable &cover_table, std::pair p) { // repeat until no star found in prime's column - while (star_graph.isColSet(p.second)) - { - // find and reset star in prime's column - std::pair s = { star_graph.rowForCol(p.second), p.second }; + while (star_graph.isColSet(p.second)) { + // find and reset star in prime's column + std::pair s = {star_graph.rowForCol(p.second), p.second}; star_graph.reset(s.first, s.second); // set this prime to a star star_graph.set(p.first, p.second); // repeat for prime in cleared star's row - p = { s.first, prime_graph.colForRow(s.first) }; + p = {s.first, prime_graph.colForRow(s.first)}; } star_graph.set(p.first, p.second); cover_table.clear(); prime_graph.clear(); } -void munkresStep5(torch::TensorAccessor cost_graph, const CoverTable &cover_table, int nrows, int ncols) -{ +void munkresStep5(float *cost_graph, const int M, const CoverTable &cover_table, + const int nrows, const int ncols) { bool valid = false; float min; - for (int i = 0; i < nrows; i++) - { - for (int j = 0; j < ncols; j++) - { - if (!cover_table.isCovered(i, j)) - { - if (!valid) - { - min = cost_graph[i][j]; + for (int i = 0; i < nrows; i++) { + for (int j = 0; j < ncols; j++) { + if (!cover_table.isCovered(i, j)) { + if (!valid) { + min = cost_graph[i * M + j]; valid = true; - } - else if (cost_graph[i][j] < min) - { - min = cost_graph[i][j]; + } else if (cost_graph[i * M + j] < min) { + min = cost_graph[i * M + j]; } } } } - for (int i = 0; i < nrows; i++) - { - if (cover_table.isRowCovered(i)) - { + for (int i = 0; i < nrows; i++) { + if (cover_table.isRowCovered(i)) { for (int j = 0; j < ncols; j++) { - cost_graph[i][j] += min; + cost_graph[i * M + j] += min; } -// cost_graph.addToRow(i, min); + // cost_graph.addToRow(i, min); } } - for (int j = 0; j < ncols; j++) - { - if (!cover_table.isColCovered(j)) - { + for (int j = 0; j < ncols; j++) { + if (!cover_table.isColCovered(j)) { for (int i = 0; i < nrows; i++) { - cost_graph[i][j] -= min; + cost_graph[i * M + j] -= min; } -// cost_graph.addToCol(j, -min); + // cost_graph.addToCol(j, -min); } } } - -void _munkres(torch::TensorAccessor cost_graph, PairGraph &star_graph, int nrows, int ncols) -{ - PairGraph prime_graph(nrows, ncols); +void _munkres(float *cost_graph, const int M, PairGraph &star_graph, + const int nrows, const int ncols) { + PairGraph prime_graph(nrows, ncols); CoverTable cover_table(nrows, ncols); prime_graph.clear(); cover_table.clear(); star_graph.clear(); - + int step = 0; - if (ncols >= nrows) - { - subMinRow(cost_graph, nrows, ncols); + if (ncols >= nrows) { + subMinRow(cost_graph, M, nrows, ncols); } - if (ncols > nrows) - { + if (ncols > nrows) { step = 1; } std::pair p; bool done = false; - while (!done) - { - switch(step) - { - case 0: - subMinCol(cost_graph, nrows, ncols); - case 1: - munkresStep1(cost_graph, star_graph, nrows, ncols); - case 2: - if(munkresStep2(star_graph, cover_table)) - { - done = true; - break; - } - case 3: - if (!munkresStep3(cost_graph, star_graph, prime_graph, cover_table, p, nrows, ncols)) - { - step = 5; - break; - } - case 4: - munkresStep4(star_graph, prime_graph, cover_table, p); - step = 2; + while (!done) { + switch (step) { + case 0: + subMinCol(cost_graph, M, nrows, ncols); + case 1: + munkresStep1(cost_graph, M, star_graph, nrows, ncols); + case 2: + if (munkresStep2(star_graph, cover_table)) { + done = true; break; - case 5: - munkresStep5(cost_graph, cover_table, nrows, ncols); - step = 3; + } + case 3: + if (!munkresStep3(cost_graph, M, star_graph, prime_graph, cover_table, p, + nrows, ncols)) { + step = 5; break; + } + case 4: + munkresStep4(star_graph, prime_graph, cover_table, p); + step = 2; + break; + case 5: + munkresStep5(cost_graph, M, cover_table, nrows, ncols); + step = 3; + break; } } } - -void munkres_out(torch::Tensor cost_graph_out, torch::Tensor cost_graph, torch::Tensor topology, torch::Tensor counts) -{ - int N = counts.size(0); - int K = topology.size(0); - - cost_graph_out.copy_(cost_graph); - auto topology_a = topology.accessor(); - auto counts_a = counts.accessor(); - auto cost_graph_out_a = cost_graph_out.accessor(); - - for (int n = 0; n < N; n++) - { - for (int k = 0; k < K; k++) - { - int cmap_a_idx = topology_a[k][2]; - int cmap_b_idx = topology_a[k][3]; - int nrows = counts_a[n][cmap_a_idx]; - int ncols = counts_a[n][cmap_b_idx]; - auto star_graph = PairGraph(nrows, ncols); - _munkres(cost_graph_out_a[n][k], star_graph, nrows, ncols); - } - } +std::size_t assignment_out_workspace(const int M) { + return sizeof(float) * M * M; } -torch::Tensor munkres(torch::Tensor cost_graph, torch::Tensor topology, torch::Tensor counts) -{ - auto cost_graph_out = torch::empty_like(cost_graph); - munkres_out(cost_graph_out, cost_graph, topology, counts); - return cost_graph_out; -} +void assignment_out(int *connections, // 2xM + const float *score_graph, // MxM + const int count_a, const int count_b, const int M, + const float score_threshold, void *workspace) { + const int nrows = count_a; + const int ncols = count_b; + + // compute cost graph (negate score graph) + float *cost_graph = (float *)workspace; + for (int i = 0; i < count_a; i++) { + for (int j = 0; j < count_b; j++) { + const int idx = i * M + j; + cost_graph[idx] = -score_graph[idx]; + } + } + // run munkres algorithm + auto star_graph = PairGraph(nrows, ncols); + _munkres(cost_graph, M, star_graph, nrows, ncols); -// assignment NxKx2xM -void assignment_out(torch::Tensor connections, torch::Tensor score_graph, torch::Tensor topology, torch::Tensor counts, float score_threshold) -{ - int N = counts.size(0); - int K = topology.size(0); - - auto cost_graph = -score_graph; - auto score_graph_a = score_graph.accessor(); - auto connections_a = connections.accessor(); - auto topology_a = topology.accessor(); - auto counts_a = counts.accessor(); - auto cost_graph_out_a = cost_graph.accessor(); - - for (int n = 0; n < N; n++) - { - for (int k = 0; k < K; k++) - { - int cmap_a_idx = topology_a[k][2]; - int cmap_b_idx = topology_a[k][3]; - int nrows = counts_a[n][cmap_a_idx]; - int ncols = counts_a[n][cmap_b_idx]; - auto star_graph = PairGraph(nrows, ncols); - auto cost_graph_out_a_nk = cost_graph_out_a[n][k]; - _munkres(cost_graph_out_a_nk, star_graph, nrows, ncols); - - auto connections_a_nk = connections_a[n][k]; - auto score_graph_a_nk = score_graph_a[n][k]; - - for (int i = 0; i < nrows; i++) { - for (int j = 0; j < ncols; j++) { - if (star_graph.isPair(i, j) && score_graph_a_nk[i][j] > score_threshold) { - connections_a_nk[0][i] = j; - connections_a_nk[1][j] = i; - } - } - } - } + // fill output connections + for (int i = 0; i < nrows; i++) { + for (int j = 0; j < ncols; j++) { + if (star_graph.isPair(i, j) && score_graph[i * M + j] > score_threshold) { + connections[0 * M + i] = j; + connections[1 * M + j] = i; + } } + } } -torch::Tensor assignment(torch::Tensor score_graph, torch::Tensor topology, torch::Tensor counts, float score_threshold) -{ - auto options = torch::TensorOptions() - .dtype(torch::kInt32) - .layout(torch::kStrided) - .device(torch::kCPU) - .requires_grad(false); - - int N = counts.size(0); - int K = topology.size(0); - int M = score_graph.size(2); - - auto connections = torch::full({N, K, 2, M}, -1, options); - assignment_out(connections, score_graph, topology, counts, score_threshold); - return connections; -} \ No newline at end of file +void assignement_out_k(int *connections, // Kx2xM + const int *topology, // Kx4 + const float *score_graph, // KxMxM + const int *counts, // C + const int K, const int M, const float score_threshold, + void *workspace) { + for (int k = 0; k < K; k++) { + const int *tk = &topology[k * 4]; + const int cmap_idx_a = tk[2]; + const int cmap_idx_b = tk[3]; + const int count_a = counts[cmap_idx_a]; + const int count_b = counts[cmap_idx_b]; + assignment_out(&connections[k * 2 * M], &score_graph[k * M * M], count_a, + count_b, M, score_threshold, workspace); + } +} + +void assignement_out_nk(int *connections, // NxKx2xM + const int *topology, // Kx4 + const float *score_graph, // NxKxMxM + const int *counts, // NxC + const int N, const int C, const int K, const int M, + const float score_threshold, void *workspace) { + for (int n = 0; n < N; n++) { + assignement_out_k(&connections[n * K * 2 * M], topology, + &score_graph[n * K * M * M], &counts[n * C], K, M, + score_threshold, workspace); + } +} diff --git a/trt_pose/plugins/munkres.hpp b/trt_pose/plugins/munkres.hpp index be3e74c..6fafc7f 100644 --- a/trt_pose/plugins/munkres.hpp +++ b/trt_pose/plugins/munkres.hpp @@ -1,12 +1,24 @@ -#include -#include -#include "utils/PairGraph.hpp" +#pragma once +#include -void _munkres(torch::TensorAccessor cost_graph, PairGraph &star_graph, int nrows, int ncols); -void munkres_out(torch::Tensor cost_graph_out, torch::Tensor cost_graph, torch::Tensor topology, torch::Tensor counts); -torch::Tensor munkres(torch::Tensor cost_graph, torch::Tensor topology, torch::Tensor counts); +std::size_t assignment_out_workspace(const int M); -// assignment NxKx2xM -void assignment_out(torch::Tensor connections, torch::Tensor score_graph, torch::Tensor topology, torch::Tensor counts, float score_threshold); -torch::Tensor assignment(torch::Tensor score_graph, torch::Tensor topology, torch::Tensor counts, float score_threshold); \ No newline at end of file +void assignment_out(int *connections, // 2xM + const float *score_graph, // MxM + const int count_a, const int count_b, const int M, + const float score_threshold, void *workspace); + +void assignement_out_k(int *connections, // Kx2xM + const int *topology, // Kx4 + const float *score_graph, // KxMxM + const int *counts, // C + const int K, const int M, const float score_threshold, + void *workspace); + +void assignement_out_nk(int *connections, // NxKx2xM + const int *topology, // Kx4 + const float *score_graph, // NxKxMxM + const int *counts, // NxC + const int N, const int C, const int K, const int M, + const float score_threshold, void *workspace); diff --git a/trt_pose/plugins/test_all.cpp b/trt_pose/plugins/test_all.cpp index a68194e..8e6b018 100644 --- a/trt_pose/plugins/test_all.cpp +++ b/trt_pose/plugins/test_all.cpp @@ -2,6 +2,7 @@ #include "paf_score_graph.hpp" #include "find_peaks.hpp" #include "refine_peaks.hpp" +#include "munkres.hpp" #define ABS(x) ((x) > 0 ? (x) : (-x)) @@ -122,10 +123,45 @@ void test_paf_score_graph_hw() ); } +void test_assignment_out() +{ + const int M = 4; + const int count_a = 3; + const int count_b = 3; + const float score_threshold = 0.3; + + std::size_t workspace_size = assignment_out_workspace(M); + void *workspace = (void *) malloc(workspace_size); + + int connections[2 * M]; + const float score_graph[M * M] = { + 1., 3., 0., 0., + 1., 2., 1., 0., + 4., 3., 4., 0., + 0., 0., 0., 0., + }; + + assignment_out(connections, score_graph, count_a, count_b, M, score_threshold, workspace); + + if (connections[0] != 1) { + throw std::runtime_error("connections[0] should be 1."); + } + if (connections[1] != 0) { + throw std::runtime_error("connections[0] should be 1."); + } + if (connections[2] != 2) { + throw std::runtime_error("connections[0] should be 1."); + } + + free(workspace); +} + + int main() { test_find_peaks_out_hw(); test_refined_peaks_out_hw(); test_paf_score_graph_hw(); + test_assignment_out(); return 0; } From 908154c4bdf07274c22c2e75635347a8dc4363fa Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Feb 2020 18:26:38 -0800 Subject: [PATCH 09/16] todo --- trt_pose/plugins/README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/trt_pose/plugins/README.md b/trt_pose/plugins/README.md index b7386fa..2264416 100644 --- a/trt_pose/plugins/README.md +++ b/trt_pose/plugins/README.md @@ -1,3 +1,16 @@ +## TODO + +- [x] find_peaks plain cpp +- [x] find_peaks torch binding +- [x] refine_peaks plain cpp +- [x] refine_peaks torch binding +- [x] paf_score_graph plain cpp +- [ ] paf_score_graph torch binding +- [x] munkres plain cpp +- [ ] munkres torch binding +- [ ] connect parts plain cpp +- [ ] connect parts torch binding + ## Terminology * ``N`` - int - Batch size @@ -71,4 +84,4 @@ cmap = generate_cmap(peak_counts, normalized_peaks, height=46, width=46, stdev=1 ```python paf = generate_paf(connections, topology, peak_counts, normalized_peaks, height=46, width=46, stdev=1) -``` \ No newline at end of file +``` From 3de7916b306a4e4fc8e9fc859d698e3bd18a1f41 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 1 Mar 2020 12:08:54 -0800 Subject: [PATCH 10/16] added torch binding for paf_score_graph --- trt_pose/plugins/README.md | 2 +- trt_pose/plugins/plugins.cpp | 46 ++++++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/trt_pose/plugins/README.md b/trt_pose/plugins/README.md index 2264416..c5cfaf1 100644 --- a/trt_pose/plugins/README.md +++ b/trt_pose/plugins/README.md @@ -5,7 +5,7 @@ - [x] refine_peaks plain cpp - [x] refine_peaks torch binding - [x] paf_score_graph plain cpp -- [ ] paf_score_graph torch binding +- [x] paf_score_graph torch binding - [x] munkres plain cpp - [ ] munkres torch binding - [ ] connect parts plain cpp diff --git a/trt_pose/plugins/plugins.cpp b/trt_pose/plugins/plugins.cpp index ae3de67..40cc7b5 100644 --- a/trt_pose/plugins/plugins.cpp +++ b/trt_pose/plugins/plugins.cpp @@ -82,17 +82,53 @@ torch::Tensor refine_peaks_torch(torch::Tensor counts, torch::Tensor peaks, return refined_peaks; } +void paf_score_graph_out_torch(torch::Tensor score_graph, torch::Tensor paf, torch::Tensor topology, torch::Tensor counts, torch::Tensor peaks, const int num_integral_samples) +{ + const int N = paf.size(0); + const int K = topology.size(0); + const int C = peaks.size(1); + const int H = paf.size(2); + const int W = paf.size(3); + const int M = score_graph.size(3); + + paf_score_graph_out_nkhw( + (float*) score_graph.data_ptr(), + (const int*) topology.data_ptr(), + (const float*) paf.data_ptr(), + (const int*) counts.data_ptr(), + (const float *) peaks.data_ptr(), + N, K, C, H, W, M, + num_integral_samples + ); +} + +torch::Tensor paf_score_graph_torch(torch::Tensor paf, torch::Tensor topology, torch::Tensor counts, torch::Tensor peaks, const int num_integral_samples) +{ + auto options = torch::TensorOptions() + .dtype(torch::kFloat32) + .layout(torch::kStrided) + .device(torch::kCPU) + .requires_grad(false); + const int N = peaks.size(0); + const int K = topology.size(0); + const int M = peaks.size(2); + + torch::Tensor score_graph = torch::zeros({N, K, M, M}, options); + paf_score_graph_out_torch(score_graph, paf, topology, counts, peaks, num_integral_samples); + return score_graph; +} + PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("find_peaks", &find_peaks_torch, "find_peaks"); m.def("find_peaks_out", &find_peaks_out_torch, "find_peaks_out"); - m.def("paf_score_graph", &paf_score_graph, "paf_score_graph"); - m.def("paf_score_graph_out", &paf_score_graph_out, "paf_score_graph_out"); + m.def("paf_score_graph", &paf_score_graph_torch, "paf_score_graph"); + m.def("paf_score_graph_out", &paf_score_graph_out_torch, "paf_score_graph_out"); m.def("refine_peaks", &refine_peaks_torch, "refine_peaks"); m.def("refine_peaks_out", &refine_peaks_out_torch, "refine_peaks_out"); - m.def("munkres", &munkres, "munkres"); + //m.def("munkres", &munkres, "munkres"); m.def("connect_parts", &connect_parts, "connect_parts"); - m.def("assignment", &assignment, "assignment"); - m.def("assignment_out", &assignment_out, "assignment_out"); + //m.def("assignment", &assignment, "assignment"); + //m.def("assignment_out", &assignment_out, "assignment_out"); m.def("generate_cmap", &generate_cmap, "generate_cmap"); m.def("generate_paf", &generate_paf, "generate_paf"); } From 364ff0bee62138f75c22ebd1ea1b11028f7c8eb3 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 1 Mar 2020 12:27:41 -0800 Subject: [PATCH 11/16] munkres torch binding --- trt_pose/plugins/README.md | 2 +- trt_pose/plugins/munkres.cpp | 30 ++++++------ trt_pose/plugins/munkres.hpp | 8 ++-- trt_pose/plugins/plugins.cpp | 92 +++++++++++++++++++++++++----------- 4 files changed, 84 insertions(+), 48 deletions(-) diff --git a/trt_pose/plugins/README.md b/trt_pose/plugins/README.md index c5cfaf1..840cc68 100644 --- a/trt_pose/plugins/README.md +++ b/trt_pose/plugins/README.md @@ -7,7 +7,7 @@ - [x] paf_score_graph plain cpp - [x] paf_score_graph torch binding - [x] munkres plain cpp -- [ ] munkres torch binding +- [x] munkres torch binding - [ ] connect parts plain cpp - [ ] connect parts torch binding diff --git a/trt_pose/plugins/munkres.cpp b/trt_pose/plugins/munkres.cpp index 65fa36b..6923956 100644 --- a/trt_pose/plugins/munkres.cpp +++ b/trt_pose/plugins/munkres.cpp @@ -224,12 +224,12 @@ void assignment_out(int *connections, // 2xM } } -void assignement_out_k(int *connections, // Kx2xM - const int *topology, // Kx4 - const float *score_graph, // KxMxM - const int *counts, // C - const int K, const int M, const float score_threshold, - void *workspace) { +void assignment_out_k(int *connections, // Kx2xM + const float *score_graph, // KxMxM + const int *topology, // Kx4 + const int *counts, // C + const int K, const int M, const float score_threshold, + void *workspace) { for (int k = 0; k < K; k++) { const int *tk = &topology[k * 4]; const int cmap_idx_a = tk[2]; @@ -241,15 +241,15 @@ void assignement_out_k(int *connections, // Kx2xM } } -void assignement_out_nk(int *connections, // NxKx2xM - const int *topology, // Kx4 - const float *score_graph, // NxKxMxM - const int *counts, // NxC - const int N, const int C, const int K, const int M, - const float score_threshold, void *workspace) { +void assignment_out_nk(int *connections, // NxKx2xM + const float *score_graph, // NxKxMxM + const int *topology, // Kx4 + const int *counts, // NxC + const int N, const int C, const int K, const int M, + const float score_threshold, void *workspace) { for (int n = 0; n < N; n++) { - assignement_out_k(&connections[n * K * 2 * M], topology, - &score_graph[n * K * M * M], &counts[n * C], K, M, - score_threshold, workspace); + assignment_out_k(&connections[n * K * 2 * M], &score_graph[n * K * M * M], + topology, &counts[n * C], K, M, score_threshold, + workspace); } } diff --git a/trt_pose/plugins/munkres.hpp b/trt_pose/plugins/munkres.hpp index 6fafc7f..3cfbbb4 100644 --- a/trt_pose/plugins/munkres.hpp +++ b/trt_pose/plugins/munkres.hpp @@ -9,16 +9,16 @@ void assignment_out(int *connections, // 2xM const int count_a, const int count_b, const int M, const float score_threshold, void *workspace); -void assignement_out_k(int *connections, // Kx2xM - const int *topology, // Kx4 +void assignment_out_k(int *connections, // Kx2xM const float *score_graph, // KxMxM + const int *topology, // Kx4 const int *counts, // C const int K, const int M, const float score_threshold, void *workspace); -void assignement_out_nk(int *connections, // NxKx2xM - const int *topology, // Kx4 +void assignment_out_nk(int *connections, // NxKx2xM const float *score_graph, // NxKxMxM + const int *topology, // Kx4 const int *counts, // NxC const int N, const int C, const int K, const int M, const float score_threshold, void *workspace); diff --git a/trt_pose/plugins/plugins.cpp b/trt_pose/plugins/plugins.cpp index 40cc7b5..e9a19bf 100644 --- a/trt_pose/plugins/plugins.cpp +++ b/trt_pose/plugins/plugins.cpp @@ -82,8 +82,10 @@ torch::Tensor refine_peaks_torch(torch::Tensor counts, torch::Tensor peaks, return refined_peaks; } -void paf_score_graph_out_torch(torch::Tensor score_graph, torch::Tensor paf, torch::Tensor topology, torch::Tensor counts, torch::Tensor peaks, const int num_integral_samples) -{ +void paf_score_graph_out_torch(torch::Tensor score_graph, torch::Tensor paf, + torch::Tensor topology, torch::Tensor counts, + torch::Tensor peaks, + const int num_integral_samples) { const int N = paf.size(0); const int K = topology.size(0); const int C = peaks.size(1); @@ -92,43 +94,77 @@ void paf_score_graph_out_torch(torch::Tensor score_graph, torch::Tensor paf, tor const int M = score_graph.size(3); paf_score_graph_out_nkhw( - (float*) score_graph.data_ptr(), - (const int*) topology.data_ptr(), - (const float*) paf.data_ptr(), - (const int*) counts.data_ptr(), - (const float *) peaks.data_ptr(), - N, K, C, H, W, M, - num_integral_samples - ); + (float *)score_graph.data_ptr(), (const int *)topology.data_ptr(), + (const float *)paf.data_ptr(), (const int *)counts.data_ptr(), + (const float *)peaks.data_ptr(), N, K, C, H, W, M, num_integral_samples); } -torch::Tensor paf_score_graph_torch(torch::Tensor paf, torch::Tensor topology, torch::Tensor counts, torch::Tensor peaks, const int num_integral_samples) -{ - auto options = torch::TensorOptions() - .dtype(torch::kFloat32) - .layout(torch::kStrided) - .device(torch::kCPU) - .requires_grad(false); - const int N = peaks.size(0); - const int K = topology.size(0); - const int M = peaks.size(2); - - torch::Tensor score_graph = torch::zeros({N, K, M, M}, options); - paf_score_graph_out_torch(score_graph, paf, topology, counts, peaks, num_integral_samples); - return score_graph; +torch::Tensor paf_score_graph_torch(torch::Tensor paf, torch::Tensor topology, + torch::Tensor counts, torch::Tensor peaks, + const int num_integral_samples) { + auto options = torch::TensorOptions() + .dtype(torch::kFloat32) + .layout(torch::kStrided) + .device(torch::kCPU) + .requires_grad(false); + const int N = peaks.size(0); + const int K = topology.size(0); + const int M = peaks.size(2); + + torch::Tensor score_graph = torch::zeros({N, K, M, M}, options); + paf_score_graph_out_torch(score_graph, paf, topology, counts, peaks, + num_integral_samples); + return score_graph; +} + +void assignment_out_torch(torch::Tensor connections, torch::Tensor score_graph, + torch::Tensor topology, torch::Tensor counts, + const float score_threshold) { + const int N = counts.size(0); + const int C = counts.size(1); + const int K = topology.size(0); + const int M = score_graph.size(2); + void *workspace = (void *)malloc(assignment_out_workspace(M)); + + assignment_out_nk( + (int *)connections.data_ptr(), (const float *)score_graph.data_ptr(), + (const int *)topology.data_ptr(), (const int *)counts.data_ptr(), N, C, K, M, + score_threshold, workspace); + + free(workspace); +} + +torch::Tensor assignment_torch(torch::Tensor score_graph, + torch::Tensor topology, torch::Tensor counts, + float score_threshold) { + auto options = torch::TensorOptions() + .dtype(torch::kInt32) + .layout(torch::kStrided) + .device(torch::kCPU) + .requires_grad(false); + + int N = counts.size(0); + int K = topology.size(0); + int M = score_graph.size(2); + + auto connections = torch::full({N, K, 2, M}, -1, options); + assignment_out_torch(connections, score_graph, topology, counts, + score_threshold); + return connections; } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("find_peaks", &find_peaks_torch, "find_peaks"); m.def("find_peaks_out", &find_peaks_out_torch, "find_peaks_out"); m.def("paf_score_graph", &paf_score_graph_torch, "paf_score_graph"); - m.def("paf_score_graph_out", &paf_score_graph_out_torch, "paf_score_graph_out"); + m.def("paf_score_graph_out", &paf_score_graph_out_torch, + "paf_score_graph_out"); m.def("refine_peaks", &refine_peaks_torch, "refine_peaks"); m.def("refine_peaks_out", &refine_peaks_out_torch, "refine_peaks_out"); - //m.def("munkres", &munkres, "munkres"); + // m.def("munkres", &munkres, "munkres"); m.def("connect_parts", &connect_parts, "connect_parts"); - //m.def("assignment", &assignment, "assignment"); - //m.def("assignment_out", &assignment_out, "assignment_out"); + m.def("assignment", &assignment_torch, "assignment"); + m.def("assignment_out", &assignment_out_torch, "assignment_out"); m.def("generate_cmap", &generate_cmap, "generate_cmap"); m.def("generate_paf", &generate_paf, "generate_paf"); } From 18c008ff9d2b8ac356912a37e8c52a28d3d79f54 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 1 Mar 2020 12:29:42 -0800 Subject: [PATCH 12/16] removed ipynb checkpt --- .ipynb_checkpoints/README-checkpoint.md | 72 ------------------------- 1 file changed, 72 deletions(-) delete mode 100644 .ipynb_checkpoints/README-checkpoint.md diff --git a/.ipynb_checkpoints/README-checkpoint.md b/.ipynb_checkpoints/README-checkpoint.md deleted file mode 100644 index 2b172e2..0000000 --- a/.ipynb_checkpoints/README-checkpoint.md +++ /dev/null @@ -1,72 +0,0 @@ -# TensorRT Pose Estimation - -This project features multi-instance pose estimation accelerated by NVIDIA TensorRT. It is ideal for applications where low latency is necessary. It includes - -- Training scripts to train on any keypoint task data in MSCOCO format - -- A collection of models that may be easily optimized with TensorRT using [torch2trt](https://github.com/NVIDIA-AI-IOT/torch2trt) - -This project can be used easily for the task of human pose estimation, or extended for something new. - -If you run into any issues please [let us know](../../issues). - -## Tasks - -### Human pose estimation - - - -This task involves detecting human body pose using models trained on the MSCOCO dataset. - -#### Models - -Below are models pre-trained on the MSCOCO dataset. The throughput in FPS is shown for each platform - -| Model | Jetson Nano | Jetson Xavier | Weights | -|-------|-------------|---------------|---------| -| resnet18_baseline_att_224x224_A | 22 | 251 | [download (81MB)](https://drive.google.com/open?id=1XYDdCUdiF2xxx4rznmLb62SdOUZuoNbd) | -| densenet121_baseline_att_256x256_B | 12 | 101 | [download (84MB)](https://drive.google.com/open?id=13FkJkx7evQ1WwP54UmdiDXWyFMY1OxDU) | - - -#### Live demo - -To run the live Jupyter Notebook demo on real-time camera input, follow these steps - -1. Place the downloaded weights in the [tasks/human_pose](tasks/human_pose) directory - -2. Open and follow the [live_demo.ipynb](tasks/human_pose/live_demo.ipynb) notebook - - > You may need to modify the notebook, depending on which model you use - -## Setup - -To install trt_pose, call this command - -> We assume you have already installed PyTorch, torchvision, and TensorRT - -```bash -sudo pip3 install tqdm cython pycocotools -sudo apt-get install python3-matplotlib -git clone https://github.com/NVIDIA-AI-IOT/trt_pose -cd trt_pose -sudo python3 setup.py install -``` - -## See also - -- [torch2trt](http://github.com/NVIDIA-AI-IOT/torch2trt) - An easy to use PyTorch to TensorRT converter - -- [JetBot](http://github.com/NVIDIA-AI-IOT/jetbot) - An educational AI robot based on NVIDIA Jetson Nano -- [JetRacer](http://github.com/NVIDIA-AI-IOT/jetracer) - An educational AI racecar using NVIDIA Jetson Nano -- [JetCam](http://github.com/NVIDIA-AI-IOT/jetcam) - An easy to use Python camera interface for NVIDIA Jetson - -## References - -Cao, Zhe, et al. "Realtime multi-person 2d pose estimation using part affinity fields." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017. - -Xiao, Bin, Haiping Wu, and Yichen Wei. "Simple baselines for human pose estimation and tracking." Proceedings of the European Conference on Computer Vision (ECCV). 2018. From d3e2404534f2aa5c26af47c29d89625aac7fe868 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 1 Mar 2020 13:26:34 -0800 Subject: [PATCH 13/16] added connect_parts refactor --- trt_pose/plugins/README.md | 5 +- trt_pose/plugins/connect_parts.cpp | 193 +++++++++++++-------------- trt_pose/plugins/connect_parts.hpp | 23 +++- trt_pose/plugins/paf_score_graph.cpp | 4 +- trt_pose/plugins/plugins.cpp | 40 +++++- 5 files changed, 151 insertions(+), 114 deletions(-) diff --git a/trt_pose/plugins/README.md b/trt_pose/plugins/README.md index 840cc68..9fe6690 100644 --- a/trt_pose/plugins/README.md +++ b/trt_pose/plugins/README.md @@ -8,8 +8,9 @@ - [x] paf_score_graph torch binding - [x] munkres plain cpp - [x] munkres torch binding -- [ ] connect parts plain cpp -- [ ] connect parts torch binding +- [x] connect parts plain cpp +- [x] connect parts torch binding +- [ ] test full refactored pipeline ## Terminology diff --git a/trt_pose/plugins/connect_parts.cpp b/trt_pose/plugins/connect_parts.cpp index da2b0f9..4d8bda7 100644 --- a/trt_pose/plugins/connect_parts.cpp +++ b/trt_pose/plugins/connect_parts.cpp @@ -1,112 +1,99 @@ #include "connect_parts.hpp" +#include +std::size_t connect_parts_out_workspace(const int C, const int M) { + return sizeof(int) * C * M; +} + +void connect_parts_out(int *object_counts, // 1 + int *objects, // PxC + const int *connections, // Kx2xM + const int *topology, // Kx4 + const int *counts, // C + const int K, const int C, const int M, const int P, + void *workspace) { + + // initialize objects + for (int i = 0; i < C * M; i++) { + objects[i] = -1; + } + + // initialize visited + std::memset(workspace, 0, connect_parts_out_workspace(C, M)); + int *visited = (int *)workspace; + + int num_objects = 0; + + for (int c = 0; c < C; c++) { + if (num_objects >= P) { + break; + } + + const int count = counts[c]; + + for (int i = 0; i < count; i++) { + if (num_objects >= P) { + break; + } + + std::queue> q; + bool new_object = false; + q.push({c, i}); -void connect_parts_out(torch::Tensor object_counts, torch::Tensor objects, torch::Tensor connections, torch::Tensor topology, torch::Tensor counts, int max_count) -{ - auto options = torch::TensorOptions() - .dtype(torch::kInt32) - .layout(torch::kStrided) - .device(torch::kCPU) - .requires_grad(false); - int N = counts.size(0); - int K = topology.size(0); - int C = counts.size(1); - int M = connections.size(3); - - auto visited = torch::zeros({N, C, M}, options); - auto visited_a = visited.accessor(); - auto counts_a = counts.accessor(); - auto topology_a = topology.accessor(); - auto objects_a = objects.accessor(); - auto object_counts_a = object_counts.accessor(); - auto connections_a = connections.accessor(); - - for (int n = 0; n < N; n++) - { - int num_objects = 0; - for (int c = 0; c < C; c++) - { - if (num_objects >= max_count) { - break; + while (!q.empty()) { + auto node = q.front(); + q.pop(); + int c_n = node.first; + int i_n = node.second; + + if (visited[c_n * M + i_n]) { + continue; + } + + visited[c_n * M + i_n] = 1; + new_object = true; + objects[num_objects * C + c_n] = i_n; + + for (int k = 0; k < K; k++) { + const int *tk = &topology[k * 4]; + const int c_a = tk[2]; + const int c_b = tk[3]; + const int *ck = &connections[k * 2 * M]; + + if (c_a == c_n) { + int i_b = ck[i_n]; + if (i_b >= 0) { + q.push({c_b, i_b}); } - - int count = counts_a[n][c]; - - for (int i = 0; i < count; i++) - { - if (num_objects >= max_count) { - break; - } - - std::queue> q; - bool new_object = false; - q.push({c, i}); - - while (!q.empty()) - { - auto node = q.front(); - q.pop(); - int c_n = node.first; - int i_n = node.second; - - if (visited_a[n][c_n][i_n]) { - continue; - } - - visited_a[n][c_n][i_n] = 1; - new_object = true; - objects_a[n][num_objects][c_n] = i_n; - - for (int k = 0; k < K; k++) - { - int c_a = topology_a[k][2]; - int c_b = topology_a[k][3]; - - if (c_a == c_n) - { - int i_b = connections_a[n][k][0][i_n]; - if (i_b >= 0) { - q.push({c_b, i_b}); - } - } - - if (c_b == c_n) - { - int i_a = connections_a[n][k][1][i_n]; - if (i_a >= 0) { - q.push({c_a, i_a}); - } - } - } - } - - if (new_object) - { - num_objects++; - } + } + + if (c_b == c_n) { + int i_a = ck[M + i_n]; + if (i_a >= 0) { + q.push({c_a, i_a}); } + } } - - object_counts_a[n] = num_objects; + } + + if (new_object) { + num_objects++; + } } + } + *object_counts = num_objects; } - -std::vector connect_parts(torch::Tensor connections, torch::Tensor topology, torch::Tensor counts, int max_count) -{ - auto options = torch::TensorOptions() - .dtype(torch::kInt32) - .layout(torch::kStrided) - .device(torch::kCPU) - .requires_grad(false); - - int N = counts.size(0); - int K = topology.size(0); - int C = counts.size(1); - int M = connections.size(3); - - auto objects = torch::full({N, max_count, C}, -1, options); - auto object_counts = torch::zeros({N}, options); - connect_parts_out(object_counts, objects, connections, topology, counts, max_count); - return {object_counts, objects}; -} \ No newline at end of file +void connect_parts_out_batch(int *object_counts, // N + int *objects, // NxPxC + const int *connections, // NxKx2xM + const int *topology, // Kx4 + const int *counts, // NxC + const int N, const int K, const int C, const int M, + const int P, void *workspace) { + for (int n = 0; n < N; n++) { + connect_parts_out(&object_counts[n], &objects[n * P * C], + &connections[n * K * 2 * M], topology, &counts[n * C], K, + C, M, P, workspace); + } +} diff --git a/trt_pose/plugins/connect_parts.hpp b/trt_pose/plugins/connect_parts.hpp index be2bd15..6294a1a 100644 --- a/trt_pose/plugins/connect_parts.hpp +++ b/trt_pose/plugins/connect_parts.hpp @@ -1,7 +1,20 @@ -#include -#include -#include +#pragma once +#include +std::size_t connect_parts_out_workspace(const int C, const int M); -void connect_parts_out(torch::Tensor object_counts, torch::Tensor objects, torch::Tensor connections, torch::Tensor topology, torch::Tensor counts, int max_count); -std::vector connect_parts(torch::Tensor connections, torch::Tensor topology, torch::Tensor counts, int max_count); \ No newline at end of file +void connect_parts_out(int *object_counts, // 1 + int *objects, // PxC + const int *connections, // Kx2xM + const int *topology, // Kx4 + const int *counts, // C + const int K, const int C, const int M, const int P, + void *workspace); + +void connect_parts_out_batch(int *object_counts, // N + int *objects, // NxPxC + const int *connections, // NxKx2xM + const int *topology, // Kx4 + const int *counts, // NxC + const int N, const int K, const int C, const int M, + const int P, void *workspace); diff --git a/trt_pose/plugins/paf_score_graph.cpp b/trt_pose/plugins/paf_score_graph.cpp index 5e1ead4..4037d16 100644 --- a/trt_pose/plugins/paf_score_graph.cpp +++ b/trt_pose/plugins/paf_score_graph.cpp @@ -31,12 +31,11 @@ void paf_score_graph_out_hw(float *score_graph, // MxM float uab_j = pab_j / pab_norm; float integral = 0.; - float progress = 0.; float increment = 1.f / num_integral_samples; for (int t = 0; t < num_integral_samples; t++) { // compute integral point T - float progress = (float)t / (float)num_integral_samples; + float progress = (float)t / ((float)num_integral_samples - 1); float pt_i = pa_i + progress * pab_i; float pt_j = pa_j + progress * pab_j; @@ -64,7 +63,6 @@ void paf_score_graph_out_hw(float *score_graph, // MxM // point float dot = pt_paf_i * uab_i + pt_paf_j * uab_j; integral += dot; - progress += increment; } integral /= num_integral_samples; diff --git a/trt_pose/plugins/plugins.cpp b/trt_pose/plugins/plugins.cpp index e9a19bf..3265ed6 100644 --- a/trt_pose/plugins/plugins.cpp +++ b/trt_pose/plugins/plugins.cpp @@ -153,6 +153,43 @@ torch::Tensor assignment_torch(torch::Tensor score_graph, return connections; } +void connect_parts_out_torch(torch::Tensor object_counts, torch::Tensor objects, torch::Tensor connections, torch::Tensor topology, torch::Tensor counts, int max_count) +{ + const int N = object_counts.size(0); + const int K = topology.size(0); + const int C = counts.size(1); + const int M = connections.size(3); + const int P = max_count; + void *workspace = malloc(connect_parts_out_workspace(C, M)); + connect_parts_out_batch( + (int *) object_counts.data_ptr(), + (int *) objects.data_ptr(), + (const int *) connections.data_ptr(), + (const int *) topology.data_ptr(), + (const int *) counts.data_ptr(), + N, K, C, M, P, workspace); + free(workspace); +} + +std::vector connect_parts_torch(torch::Tensor connections, torch::Tensor topology, torch::Tensor counts, int max_count) +{ + auto options = torch::TensorOptions() + .dtype(torch::kInt32) + .layout(torch::kStrided) + .device(torch::kCPU) + .requires_grad(false); + + int N = counts.size(0); + int K = topology.size(0); + int C = counts.size(1); + int M = connections.size(3); + + auto objects = torch::full({N, max_count, C}, -1, options); + auto object_counts = torch::zeros({N}, options); + connect_parts_out_torch(object_counts, objects, connections, topology, counts, max_count); + return {object_counts, objects}; +} + PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("find_peaks", &find_peaks_torch, "find_peaks"); m.def("find_peaks_out", &find_peaks_out_torch, "find_peaks_out"); @@ -162,7 +199,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("refine_peaks", &refine_peaks_torch, "refine_peaks"); m.def("refine_peaks_out", &refine_peaks_out_torch, "refine_peaks_out"); // m.def("munkres", &munkres, "munkres"); - m.def("connect_parts", &connect_parts, "connect_parts"); + m.def("connect_parts", &connect_parts_torch, "connect_parts"); + m.def("connect_parts_out", &connect_parts_out_torch, "connect_parts_out"); m.def("assignment", &assignment_torch, "assignment"); m.def("assignment_out", &assignment_out_torch, "assignment_out"); m.def("generate_cmap", &generate_cmap, "generate_cmap"); From dff0983c12be74642296e1f68c21cd86b7b6031e Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 1 Mar 2020 13:26:56 -0800 Subject: [PATCH 14/16] added cmake --- CMakeLists.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..464fad4 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.6) +project(trt_pose) + +add_library(trt_pose SHARED + trt_pose/plugins/find_peaks.cpp + trt_pose/plugins/refine_peaks.cpp + trt_pose/plugins/paf_score_graph.cpp + trt_pose/plugins/munkres.cpp + trt_pose/plugins/connect_parts.cpp +) + +add_executable(trt_pose_test_all + trt_pose/plugins/test_all.cpp +) +target_link_libraries(trt_pose_test_all trt_pose) From ad8d806b006c2e664d3c3a5a42f75b8bdd96c10b Mon Sep 17 00:00:00 2001 From: John Date: Mon, 2 Mar 2020 10:08:59 -0800 Subject: [PATCH 15/16] Update README.md --- trt_pose/plugins/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trt_pose/plugins/README.md b/trt_pose/plugins/README.md index 9fe6690..f62f328 100644 --- a/trt_pose/plugins/README.md +++ b/trt_pose/plugins/README.md @@ -10,7 +10,7 @@ - [x] munkres torch binding - [x] connect parts plain cpp - [x] connect parts torch binding -- [ ] test full refactored pipeline +- [x] test full refactored pipeline ## Terminology From 6a1cace63ae6bc40f9b42ed3e44734227794d4d3 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 2 Mar 2020 10:10:41 -0800 Subject: [PATCH 16/16] incremented patch setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9c1da6e..926825c 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name='trt_pose', - version='0.0.0', + version='0.0.1', description='Pose detection accelerated by NVIDIA TensorRT', packages=find_packages(), ext_package='trt_pose',