Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(CLConform_TARGET_ARCH x86_64)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*")
set(CLConform_TARGET_ARCH x86)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv.*")
set(CLConform_TARGET_ARCH RISCV)
endif()

if(NOT DEFINED CLConform_TARGET_ARCH)
Expand Down
11 changes: 7 additions & 4 deletions test_common/harness/fpcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ typedef int64_t FPU_mode_type;
#elif defined(__PPC__)
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#elif defined(__riscv)
#define _FPU_MASK_NI 1
static FPU_mode_type fpu_control;
#elif defined(__mips__)
#include "mips/m32c1.h"
#endif
Expand All @@ -56,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *oldMode)
|| defined(_M_X64) || defined(__MINGW32__)
*oldMode = _mm_getcsr();
_mm_setcsr(*oldMode | 0x8040);
#elif defined(__PPC__)
#elif defined(__PPC__) || defined(__riscv)
*oldMode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined(__arm__)
Expand Down Expand Up @@ -89,8 +92,8 @@ inline void DisableFTZ(FPU_mode_type *oldMode)
|| defined(_M_X64) || defined(__MINGW32__)
*oldMode = _mm_getcsr();
_mm_setcsr(*oldMode & ~0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
#elif defined(__PPC__) || defined(__riscv)
*oldMode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
Expand Down Expand Up @@ -121,7 +124,7 @@ inline void RestoreFPState(FPU_mode_type *mode)
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64) || defined(__MINGW32__)
_mm_setcsr(*mode);
#elif defined(__PPC__)
#elif defined(__PPC__) || defined(__riscv)
fpu_control = *mode;
#elif defined(__arm__)
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
Expand Down
5 changes: 5 additions & 0 deletions test_common/harness/rounding_mode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ RoundingMode get_round(void)
#elif defined(__mips__)
#include "mips/m32c1.h"
#endif

void *FlushToZero(void)
{
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
Expand Down Expand Up @@ -231,6 +232,8 @@ void *FlushToZero(void)
#elif defined(__mips__)
fpa_bissr(FPA_CSR_FS);
return NULL;
#elif defined(__riscv)
return NULL;
#else
#error Unknown arch
#endif
Expand Down Expand Up @@ -266,6 +269,8 @@ void UnFlushToZero(void *p)
_FPU_SETCW(flags);
#elif defined(__mips__)
fpa_bicsr(FPA_CSR_FS);
#elif defined(__riscv)
return;
#else
#error Unknown arch
#endif
Expand Down
2 changes: 2 additions & 0 deletions test_common/harness/testHarness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1409,6 +1409,8 @@ void PrintArch(void)
vlog("ARCH:\tWindows\n");
#elif defined(__mips__)
vlog("ARCH:\tmips\n");
#elif defined(__riscv)
vlog("ARCH:\tRISC-V\n");
#else
#error unknown arch
#endif
Expand Down
18 changes: 10 additions & 8 deletions test_conformance/contractions/contractions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y)
}
#endif

#ifdef __PPC__
#if defined(__PPC__) || defined(__riscv)
float ppc_mul(float a, float b)
{
float p;
Expand Down Expand Up @@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device )
// turn that off
f3[i] = sse_mul(q, q2);
f4[i] = sse_mul(-q, q2);
#elif defined(__PPC__)
// None of the current generation PPC processors support HW
// FTZ, emulate it in sw.
#elif (defined(__PPC__) || defined(__riscv))
// RISC-V CPUs with default 'f' fp32 extension do not support
// enabling/disabling FTZ mode, subnormals are always handled
// without FTZ. None of the current generation PPC processors
// support HW FTZ, emulate it in sw.
f3[i] = ppc_mul(q, q2);
f4[i] = ppc_mul(-q, q2);
#else
Expand Down Expand Up @@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device )
skipTest[j][i] = (bufSkip[i] ||
(gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));

#if defined(__PPC__)
// Since the current Power processors don't emulate flush to zero in HW,
// it must be emulated in SW instead.
#if defined(__PPC__) || defined(__riscv)
// Since the current Power processors don't emulate flush to
// zero in HW, it must be emulated in SW instead. (same for
// RISC-V CPUs with 'f' extension)
if (gForceFTZ)
{
if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
Expand Down Expand Up @@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device )
}
}


double *f = (double*) buf1;
double *f2 = (double*) buf2;
double *f3 = (double*) buf3_double;
Expand Down
2 changes: 0 additions & 2 deletions test_conformance/conversions/basic_test_conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
uint64_t GetTime(void);

void WriteInputBufferComplete(void *);
void *FlushToZero(void);
void UnFlushToZero(void *);
}

struct CalcRefValsBase
Expand Down
10 changes: 6 additions & 4 deletions test_conformance/math_brute_force/reference_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,9 @@ double reference_add(double x, double y)
__m128 vb = _mm_set_ss((float)b);
va = _mm_add_ss(va, vb);
_mm_store_ss((float *)&a, va);
#elif defined(__PPC__)
#elif defined(__PPC__) || defined(__riscv)
// RISC-V CPUs with default 'f' fp32 extension do not support any way to
// enable/disable FTZ mode, subnormals are always handled without flushing.
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
// denorm's to zero. As such, the reference add with FTZ must be emulated in
// sw.
Expand All @@ -876,7 +878,7 @@ double reference_add(double x, double y)
} ub;
ub.d = b;
cl_uint mantA, mantB;
cl_ulong addendA, addendB, sum;
cl_ulong addendA, addendB;
int expA = extractf(a, &mantA);
int expB = extractf(b, &mantB);
cl_uint signA = ua.u & 0x80000000U;
Expand Down Expand Up @@ -972,7 +974,7 @@ double reference_multiply(double x, double y)
__m128 vb = _mm_set_ss((float)b);
va = _mm_mul_ss(va, vb);
_mm_store_ss((float *)&a, va);
#elif defined(__PPC__)
#elif defined(__PPC__) || defined(__riscv)
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
// denorm's to zero. As such, the reference multiply with FTZ must be
// emulated in sw.
Expand Down Expand Up @@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x)

long double reference_rintl(long double x)
{
#if defined(__PPC__)
#if defined(__PPC__) || defined(__riscv)
// On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
// mantissa can represent more than LDBL_MANT_DIG binary digits.
x = rintl(x);
Expand Down