cuda12.4 tensorrt10.6 在python环境下面适应tensorrt推理结果没有问题,但是在c++环境推理时labels和scores有值但是boxes全是0,推理代码
(cudaMemcpyAsync(buffers[0], input, 1 * 3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice, stream));
(cudaMemcpyAsync(buffers[1], input, 1 * 2 * sizeof(int64), cudaMemcpyHostToDevice, stream));
cudaStreamSynchronize(stream);
context->setTensorAddress(engine->getIOTensorName(0), buffers[0]);
context->setTensorAddress(engine->getIOTensorName(1), buffers[1]);
context->setTensorAddress(engine->getIOTensorName(2), buffers[2]);
context->setTensorAddress(engine->getIOTensorName(3), buffers[3]);
context->setTensorAddress(engine->getIOTensorName(4), buffers[4]);
//context->enqueueV3(stream);
context->executeV2(buffers);
(cudaMemcpyAsync(labels, buffers[2], 1 * 300 * sizeof(int64), cudaMemcpyDeviceToHost, stream));
(cudaMemcpyAsync(boxes, buffers[3], 1 * 300 * 4 * sizeof(float), cudaMemcpyDeviceToHost, stream));
(cudaMemcpyAsync(scores, buffers[4], 1 * 300 * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
推理后处理
for (int i = 0; i < 300; i++) {
float score =(scores[b * 300 + i ]);
int64 label =(labels[b * 300 + i ]);
if (score < 0.4) {
continue;
}
std::cout << " score " << score << std::endl;
std::cout << " label " << label << std::endl;
//float(&box)[4] = * reinterpret_cast<float(*)[4]>(&boxes[b * 300 * 4 + i *4]);
float x = boxes[b * 300 * 4 + i + 0];
float y = boxes[b * 300 * 4 + i + 1];
float x2 = boxes[b * 300 * 4 + i + 2];
float y2 = boxes[b * 300 * 4 + i + 3];
std::cout << " x " << boxes[b * 300 * 4 + i + 0] << std::endl;
std::cout << " y " << boxes[b * 300 * 4 + i + 1] << std::endl;
std::cout << " x2 " << boxes[b * 300 * 4 + i + 2] << std::endl;
std::cout << " y2 " << boxes[b * 300 * 4 + i + 3] << std::endl;
cv::rectangle(img_resize, cv::Rect(cv::Point(int(x), int(y)), cv::Point(int(x2), int(y2))), cv::Scalar(00, 0, 255), 1);
}
结果打印
score 0.77002
label 0
x 0
y 0
x2 0
y2 0
score 0.491455
label 1
x 0
y 0
x2 0
y2 0
求解
cuda12.4 tensorrt10.6 在python环境下面适应tensorrt推理结果没有问题,但是在c++环境推理时labels和scores有值但是boxes全是0,推理代码
(cudaMemcpyAsync(buffers[0], input, 1 * 3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice, stream));
(cudaMemcpyAsync(buffers[1], input, 1 * 2 * sizeof(int64), cudaMemcpyHostToDevice, stream));
cudaStreamSynchronize(stream);
context->setTensorAddress(engine->getIOTensorName(0), buffers[0]);
context->setTensorAddress(engine->getIOTensorName(1), buffers[1]);
context->setTensorAddress(engine->getIOTensorName(2), buffers[2]);
context->setTensorAddress(engine->getIOTensorName(3), buffers[3]);
context->setTensorAddress(engine->getIOTensorName(4), buffers[4]);
//context->enqueueV3(stream);
context->executeV2(buffers);
(cudaMemcpyAsync(labels, buffers[2], 1 * 300 * sizeof(int64), cudaMemcpyDeviceToHost, stream));
(cudaMemcpyAsync(boxes, buffers[3], 1 * 300 * 4 * sizeof(float), cudaMemcpyDeviceToHost, stream));
(cudaMemcpyAsync(scores, buffers[4], 1 * 300 * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
推理后处理
for (int i = 0; i < 300; i++) {
float score =(scores[b * 300 + i ]);
int64 label =(labels[b * 300 + i ]);
if (score < 0.4) {
continue;
}
std::cout << " score " << score << std::endl;
std::cout << " label " << label << std::endl;
//float(&box)[4] = * reinterpret_cast<float(*)[4]>(&boxes[b * 300 * 4 + i *4]);
float x = boxes[b * 300 * 4 + i + 0];
float y = boxes[b * 300 * 4 + i + 1];
float x2 = boxes[b * 300 * 4 + i + 2];
float y2 = boxes[b * 300 * 4 + i + 3];
std::cout << " x " << boxes[b * 300 * 4 + i + 0] << std::endl;
std::cout << " y " << boxes[b * 300 * 4 + i + 1] << std::endl;
std::cout << " x2 " << boxes[b * 300 * 4 + i + 2] << std::endl;
std::cout << " y2 " << boxes[b * 300 * 4 + i + 3] << std::endl;
cv::rectangle(img_resize, cv::Rect(cv::Point(int(x), int(y)), cv::Point(int(x2), int(y2))), cv::Scalar(00, 0, 255), 1);
}
结果打印
score 0.77002
label 0
x 0
y 0
x2 0
y2 0
score 0.491455
label 1
x 0
y 0
x2 0
y2 0
求解