diff --git a/CMakeLists.txt b/CMakeLists.txt index 898f62e97b..04e5a5ac69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,6 +75,10 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") set(CLConform_TARGET_ARCH x86_64) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*") set(CLConform_TARGET_ARCH x86) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") + set(CLConform_TARGET_ARCH RISCV64) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv") + set(CLConform_TARGET_ARCH RISCV) endif() if(NOT DEFINED CLConform_TARGET_ARCH) @@ -168,6 +172,7 @@ if(WIN32 AND ${CLConform_TARGET_ARCH} STREQUAL "x86") endif() list(APPEND CLConform_LIBRARIES ${OPENCL_LIBRARIES}) + if(ANDROID) list(APPEND CLConform_LIBRARIES m) endif() @@ -182,6 +187,29 @@ if(LINK_PTHREAD) list(APPEND CLConform_LIBRARIES pthread) endif() +if(ENABLE_ASAN) + set(CLConform_LIBRARIES "asan" ${CLConform_LIBRARIES}) + if("${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "6.0.0") + list(APPEND SANITIZER_OPTIONS "-fsanitize=address") + else() + list(APPEND SANITIZER_OPTIONS "-fsanitize=address" "-fsanitize-recover=address") + endif() + list(APPEND SANITIZER_LIBS "asan") +endif() + +if(ENABLE_TSAN) + list(APPEND SANITIZER_OPTIONS "-fsanitize=thread") + list(APPEND SANITIZER_LIBS "tsan") +endif() + +if(ENABLE_ASAN OR ENABLE_TSAN) + set(CLConform_LIBRARIES "${SANITIZER_LIBS}" ${CLConform_LIBRARIES}) + if(SANITIZER_OPTIONS) + list(APPEND SANITIZER_OPTIONS "-fno-omit-frame-pointer") + add_compile_options(${SANITIZER_OPTIONS}) + endif() +endif() + if(APPLE) find_library(corefoundation CoreFoundation) find_library(iokit IOKit) @@ -189,6 +217,8 @@ if(APPLE) list(APPEND CLConform_LIBRARIES ${iokit}) endif(APPLE) +message(STATUS "CLConform_LIBRARIES: ${CLConform_LIBRARIES}") + include_directories(SYSTEM ${CL_INCLUDE_DIR}) include_directories(${CLConform_SOURCE_DIR}/test_common/harness ${CLConform_SOURCE_DIR}/test_common/gles diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp index fb1291d610..ea7d19a37f 100644 --- a/test_common/harness/ThreadPool.cpp +++ b/test_common/harness/ThreadPool.cpp @@ -562,6 +562,10 @@ void ThreadPool_Init(void) // Hopefully your system returns logical cpus here, as does MacOS X gThreadCount = (cl_int)sysconf(_SC_NPROCESSORS_CONF); } + + if (gThreadCount > 8) + gThreadCount = 8; + #else /* !_WIN32 */ // Hopefully your system returns logical cpus here, as does MacOS X gThreadCount = (cl_int)sysconf(_SC_NPROCESSORS_CONF); diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h index 12aba0a94a..ba8d88d6cf 100644 --- a/test_common/harness/fpcontrol.h +++ b/test_common/harness/fpcontrol.h @@ -69,6 +69,8 @@ inline void ForceFTZ(FPU_mode_type *mode) __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24))); #elif defined(__mips__) fpa_bissr(FPA_CSR_FS); +#elif defined(__riscv) + #warning RISC-V platform does NOT support FTZ... #else #error ForceFTZ needs an implentation #endif @@ -97,6 +99,8 @@ inline void DisableFTZ(FPU_mode_type *mode) __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24))); #elif defined(__mips__) fpa_bicsr(FPA_CSR_FS); +#elif defined(__riscv) + #warning RISC-V platform does NOT support FTZ... #else #error DisableFTZ needs an implentation #endif @@ -117,6 +121,8 @@ inline void RestoreFPState(FPU_mode_type *mode) __asm__ volatile("msr fpcr, %0" ::"r"(*mode)); #elif defined(__mips__) // Mips runs by default with DAZ=1 FTZ=1 +#elif defined(__riscv) + #warning RISC-V platform does NOT support FTZ... #else #error RestoreFPState needs an implementation #endif diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp index b2e443b783..bca8f700e0 100644 --- a/test_common/harness/rounding_mode.cpp +++ b/test_common/harness/rounding_mode.cpp @@ -224,6 +224,9 @@ void *FlushToZero(void) #elif defined(__mips__) fpa_bissr(FPA_CSR_FS); return NULL; +#elif defined(__riscv) + #warning RISC-V does NOT support FTZ... + return NULL; #else #error Unknown arch #endif @@ -254,6 +257,8 @@ void UnFlushToZero(void *p) _FPU_SETCW(flags); #elif defined(__mips__) fpa_bicsr(FPA_CSR_FS); +#elif defined(__riscv) + #warning RISC-V does NOT support FTZ... #else #error Unknown arch #endif diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index df54a35d71..beb7f95ff1 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -1374,6 +1374,8 @@ void PrintArch(void) vlog("ARCH:\tWindows\n"); #elif defined(__mips__) vlog("ARCH:\tmips\n"); +#elif defined(__riscv) + vlog("ARCH:\riscv/riscv64\n"); #else #error unknown arch #endif diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp index 90b302b24c..caa6bd81ff 100644 --- a/test_conformance/api/test_kernel_arg_info.cpp +++ b/test_conformance/api/test_kernel_arg_info.cpp @@ -542,33 +542,38 @@ size_t get_param_size(const std::string& arg_type, cl_device_id device, } size_t ret(0); + /* "signed" and "unsigned" type names are valid (int is implied) */ + if (arg_type.find("signed") != std::string::npos) + { + ret = sizeof(cl_int); + } if (arg_type.find("char") != std::string::npos) { - ret += sizeof(cl_char); + ret = sizeof(cl_char); } if (arg_type.find("short") != std::string::npos) { - ret += sizeof(cl_short); + ret = sizeof(cl_short); } if (arg_type.find("half") != std::string::npos) { - ret += sizeof(cl_half); + ret = sizeof(cl_half); } if (arg_type.find("int") != std::string::npos) { - ret += sizeof(cl_int); + ret = sizeof(cl_int); } if (arg_type.find("long") != std::string::npos) { - ret += sizeof(cl_long); + ret = sizeof(cl_long); } if (arg_type.find("float") != std::string::npos) { - ret += sizeof(cl_float); + ret = sizeof(cl_float); } if (arg_type.find("double") != std::string::npos) { - ret += sizeof(cl_double); + ret = sizeof(cl_double); } if (arg_type.back() == '2') { diff --git a/test_conformance/basic/test_async_copy.cpp b/test_conformance/basic/test_async_copy.cpp index bb529bce0d..2eb2bf9055 100644 --- a/test_conformance/basic/test_async_copy.cpp +++ b/test_conformance/basic/test_async_copy.cpp @@ -150,7 +150,7 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, localWorkgroupSize = max_workgroup_size; size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem; - size_t numberOfLocalWorkgroups = 1111; + size_t numberOfLocalWorkgroups = 57; size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize; size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize; diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp index 932e9b8c95..543c188e81 100644 --- a/test_conformance/basic/test_async_strided_copy.cpp +++ b/test_conformance/basic/test_async_strided_copy.cpp @@ -147,7 +147,7 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu localWorkgroupSize = max_workgroup_size; size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem; - size_t numberOfLocalWorkgroups = 579;//1111; + size_t numberOfLocalWorkgroups = 57; // Reduce the numberOfLocalWorkgroups so that no more than 1/2 of CL_DEVICE_GLOBAL_MEM_SIZE is consumed // by the allocated buffer. This is done to avoid resource errors resulting from address space fragmentation. diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp index abe95af549..3d2150dfdd 100644 --- a/test_conformance/contractions/contractions.cpp +++ b/test_conformance/contractions/contractions.cpp @@ -50,7 +50,7 @@ #include #endif -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) +#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__x86_64__) && defined(__linux__)) #include #endif @@ -110,7 +110,7 @@ static int RunTest_Double( int testNumber ); #define nan( X ) strtod( "NAN", ( char ** ) NULL ) #endif -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) +#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__x86_64__) && defined(__linux__)) // defeat x87 on MSVC float sse_add(float x, float y) { @@ -687,7 +687,7 @@ test_status InitCL( cl_device_id device ) float q2 = f2[i]; feclearexcept(FE_OVERFLOW); -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) +#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__x86_64__) && defined(__linux__)) // VS2005 might use x87 for straight multiplies, and we can't // turn that off f3[i] = sse_mul(q, q2); @@ -740,7 +740,7 @@ test_status InitCL( cl_device_id device ) feclearexcept(FE_OVERFLOW); switch (j) { -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) +#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__x86_64__) && defined(__linux__)) // VS2005 might use x87 for straight add/sub, and we can't // turn that off case 0: @@ -831,7 +831,7 @@ test_status InitCL( cl_device_id device ) { double q = f[i]; double q2 = f2[i]; -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) +#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__x86_64__) && defined(__linux__)) // VS2005 might use x87 for straight multiplies, and we can't // turn that off f3[i] = sse_mul_sd(q, q2); @@ -862,7 +862,7 @@ test_status InitCL( cl_device_id device ) // calculate reference results for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ ) { -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) +#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__x86_64__) && defined(__linux__)) // VS2005 might use x87 for straight add/sub, and we can't // turn that off correct_double[0][i] = sse_add_sd(buf3_double[i],buf4_double[i]); diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h index b0bd31d2fc..4824d3a489 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h @@ -130,7 +130,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; const char* kernelString = "__kernel void empty() {}"; - const size_t global_work_size = 4 * 16; + const size_t global_work_size = 256 * 1024; }; struct InfoMutableCommandBufferTest : BasicMutableCommandBufferTest diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp index 946fa995b3..abbe507333 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp @@ -153,9 +153,9 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest } size_t info_global_size = 0; - const size_t update_global_size = 3; - const size_t sizeToAllocate = global_work_size; - const size_t num_elements = sizeToAllocate / sizeof(cl_int); + const size_t update_global_size = 16 * 1024; + const size_t sizeToAllocate = global_work_size * 4; + const size_t num_elements = global_work_size; cl_mutable_command_khr command = nullptr; }; diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp index 12a982fa6a..28a396c0db 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp @@ -120,10 +120,11 @@ struct PropertiesArray : public InfoMutableCommandBufferTest virtual bool Skip() override { Version device_version = get_device_cl_version(device); + if (InfoMutableCommandBufferTest::Skip()) + return true; if ((device_version >= Version(3, 0)) || is_extension_available(device, "cl_khr_extended_versioning")) { - cl_version extension_version = get_extension_version( device, "cl_khr_command_buffer_mutable_dispatch"); @@ -134,7 +135,17 @@ struct PropertiesArray : public InfoMutableCommandBufferTest return true; } } - return InfoMutableCommandBufferTest::Skip(); + + cl_mutable_dispatch_fields_khr mutable_capabilities; + cl_int error = clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr); + test_error(error, "clGetDeviceInfo failed"); + + if ((mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR) == 0) + return true; + + return false; } cl_int Run() override diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_groups.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_groups.cpp index ad20fbe3b2..912f77b20b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_groups.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_groups.cpp @@ -281,7 +281,9 @@ struct MutableDispatchWorkGroups : public BasicMutableCommandBufferTest static constexpr size_t test_global_work_size = 64; static constexpr size_t update_global_size = 16; const size_t local_work_size = 8; - const size_t sizeToAllocate = 64 * sizeof(cl_int); + const size_t sizeToAllocate = (test_global_work_size > global_work_size + ? test_global_work_size + : global_work_size) * sizeof(cl_int); }; int test_command_buffer_with_no_additional_work_groups(cl_device_id device, diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy.cpp index 541ab51690..11fd540139 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy.cpp @@ -82,7 +82,7 @@ struct CommandBufferCopyBaseTest : BasicCommandBufferTest static constexpr size_t region[3] = { img_width, img_height, 1 }; static constexpr cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 }; static constexpr size_t data_size = - img_width * img_height * num_channels * sizeof(uint8_t); + img_width * img_height * num_channels * sizeof(cl_uint); clMemWrapper image; clMemWrapper buffer; clMemWrapper in_mem; diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp index f9b9096c66..d2b14cc3ab 100644 --- a/test_conformance/images/clFillImage/test_fill_generic.cpp +++ b/test_conformance/images/clFillImage/test_fill_generic.cpp @@ -302,6 +302,7 @@ int test_fill_image_generic( cl_context context, cl_command_queue queue, image_d // Unmap the image. error = clEnqueueUnmapMemObject(queue, image, mapped, 0, NULL, NULL); + error = error | clFinish(queue); if (error != CL_SUCCESS) { log_error( "ERROR: Unable to unmap image after verify: %s\n", IGetErrorString( error ) ); diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index a9a6571970..3a167481bb 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -98,6 +98,8 @@ const float specialValues[] = { MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), + 0x1.20abd6p-14, + -0x1.6a24ep-12, -1000.f, -100.f, -4.0f, @@ -191,6 +193,7 @@ const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, 128, 0x02000001, 0x04000001, 1465264071, 1488522147, -1, -2, -3, -126, -127, -128, -0x02000001, + -131, -133, -0x04000001, -1465264071, -1488522147, }; diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index c6a4b5d6b1..c45129d473 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -183,8 +183,8 @@ static int doTest(const char *name) { if (get_device_cl_version(gDevice) > Version(1, 2)) { - gTestCount++; - vlog("%3d: ", gTestCount); + int TestCount = __atomic_add_fetch(&gTestCount, 1, __ATOMIC_ACQ_REL); + vlog("%3d: ", TestCount); // Test with relaxed requirements here. if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata, true /* relaxed mode */)) @@ -207,13 +207,13 @@ static int doTest(const char *name) if (gTestFloat) { - gTestCount++; - vlog("%3d: ", gTestCount); + int TestCount = __atomic_add_fetch(&gTestCount, 1, __ATOMIC_ACQ_REL); + vlog("%3d: ", TestCount); // Don't test with relaxed requirements. if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata, false /* relaxed mode */)) { - gFailCount++; + __atomic_add_fetch(&gFailCount, 1, __ATOMIC_ACQ_REL); error++; if (gStopOnError) { @@ -226,13 +226,13 @@ static int doTest(const char *name) if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p) { - gTestCount++; - vlog("%3d: ", gTestCount); + int TestCount = __atomic_add_fetch(&gTestCount, 1, __ATOMIC_ACQ_REL); + vlog("%3d: ", TestCount); // Don't test with relaxed requirements. if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata, false /* relaxed mode*/)) { - gFailCount++; + __atomic_add_fetch(&gFailCount, 1, __ATOMIC_ACQ_REL); error++; if (gStopOnError) { diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index ab8ee797a8..b22f492a6f 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -29,7 +29,7 @@ #include extern MTdata gMTdata; -typedef std::bitset<128> bs128; +typedef std::bitset<1280> bs128; extern cl_half_rounding_mode g_rounding_mode; bs128 cl_uint4_to_bs128(cl_uint4 v); @@ -1514,7 +1514,8 @@ template struct subgroup_test // Limit it a bit so we have muliple work groups // Ideally this will still be large enough to give us multiple - if (local > test_params.local_workgroup_size) + if (local > test_params.local_workgroup_size + && global % test_params.local_workgroup_size == 0) local = test_params.local_workgroup_size;