Skip to content

Commit 4ee11fd

Browse files
authored
Use std::optional to fix perf issues in device, result, and combined storage (#2545)
* Use std::optional to fix perf issues in device, result, and combined storage caused by default construction of sycl::queue Signed-off-by: Matthew Michel <matthew.michel@intel.com> * Add asserts to check queue is present and avoid checks in release builds Signed-off-by: Matthew Michel <matthew.michel@intel.com> --------- Signed-off-by: Matthew Michel <matthew.michel@intel.com>
1 parent 15fcdcc commit 4ee11fd

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include <type_traits>
2323
#include <tuple>
2424
#include <algorithm>
25+
#include <optional>
26+
#include <cassert>
2527

2628
#include "../../iterator_impl.h"
2729

@@ -399,12 +401,13 @@ class __buffer_impl
399401

400402
struct __sycl_usm_free
401403
{
402-
sycl::queue __q;
404+
std::optional<sycl::queue> __q;
403405

404406
void
405407
operator()(void* __memory) const
406408
{
407-
sycl::free(__memory, __q);
409+
assert(__q.has_value());
410+
sycl::free(__memory, *__q);
408411
}
409412
};
410413

@@ -699,12 +702,12 @@ struct __result_and_scratch_storage : __result_and_scratch_storage_base
699702
else if (__supports_USM_device)
700703
{
701704
auto __q_proxy = std::get_deleter<__internal::__sycl_usm_free>(__scratch_buf);
702-
assert(__q_proxy != nullptr);
705+
assert(__q_proxy != nullptr && __q_proxy->__q.has_value());
703706
// Avoid default constructor for _T. Since _T is device copyable, copy construction
704707
// is equivalent to a bitwise copy and we may treat __space.__v as constructed after the memcpy.
705708
// There is no need to destroy it afterwards, as the destructor must have no effect.
706709
oneapi::dpl::__internal::__lazy_ctor_storage<_T> __space;
707-
__q_proxy->__q.memcpy(&__space.__v, __scratch_buf.get() + __scratch_n + _Idx, sizeof(_T)).wait();
710+
__q_proxy->__q->memcpy(&__space.__v, __scratch_buf.get() + __scratch_n + _Idx, sizeof(_T)).wait();
708711
return __space.__v;
709712
}
710713
else
@@ -796,8 +799,9 @@ struct __device_storage
796799
}
797800
else if (__usm_buf)
798801
{
799-
sycl::queue& __q = __usm_buf.get_deleter().__q;
800-
__q.memcpy(__dst, __usm_buf.get() + __offset, __n * sizeof(_T)).wait();
802+
auto& __q_proxy = __usm_buf.get_deleter();
803+
assert(__q_proxy.__q.has_value());
804+
__q_proxy.__q->memcpy(__dst, __usm_buf.get() + __offset, __n * sizeof(_T)).wait();
801805
}
802806
else
803807
{

0 commit comments

Comments
 (0)