Skip to content

Commit bcfa1f7

Browse files
authored
Added corrections to re-enable reciprocal test in math_brute_force suite for relaxed math mode (KhronosGroup#2221)
fixes KhronosGroup#2145 As suggested by @svenvh reciprocal has different precision requirements than divide. This PR introduces special path for reciprocal for binar_float_operator to test reciprocal with relaxed math. If this PR will get approvals, invalidate PR KhronosGroup#2162
1 parent cc9e616 commit bcfa1f7

6 files changed

Lines changed: 146 additions & 51 deletions

File tree

test_conformance/math_brute_force/binary_operator_double.cpp

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
214214
cl_double *s;
215215
cl_double *s2;
216216

217+
bool reciprocal = strcmp(name, "reciprocal") == 0;
218+
const double reciprocalArrayX[] = { 1.0 };
219+
const double *specialValuesX =
220+
reciprocal ? reciprocalArrayX : specialValues;
221+
size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
222+
217223
Force64BitFPUPrecision();
218224

219225
cl_event e[VECTOR_SIZE_COUNT];
@@ -242,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
242248
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
243249
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
244250
cl_uint idx = 0;
245-
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
251+
int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
246252
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
247253

248254
// Test edge cases
@@ -252,14 +258,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
252258
cl_double *fp2 = (cl_double *)p2;
253259
uint32_t x, y;
254260

255-
x = (job_id * buffer_elements) % specialValuesCount;
261+
x = (job_id * buffer_elements) % specialValuesCountX;
256262
y = (job_id * buffer_elements) / specialValuesCount;
257263

258264
for (; idx < buffer_elements; idx++)
259265
{
260-
fp[idx] = specialValues[x];
266+
fp[idx] = specialValuesX[x];
261267
fp2[idx] = specialValues[y];
262-
if (++x >= specialValuesCount)
268+
++x;
269+
if (x >= specialValuesCountX)
263270
{
264271
x = 0;
265272
y++;
@@ -271,7 +278,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
271278
// Init any remaining values
272279
for (; idx < buffer_elements; idx++)
273280
{
274-
p[idx] = genrand_int64(d);
281+
p[idx] =
282+
reciprocal ? ((cl_ulong *)specialValuesX)[0] : genrand_int64(d);
275283
p2[idx] = genrand_int64(d);
276284
}
277285

@@ -375,8 +383,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
375383
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
376384
s = (cl_double *)gIn + thread_id * buffer_elements;
377385
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
378-
for (size_t j = 0; j < buffer_elements; j++)
379-
r[j] = (cl_double)func.f_ff(s[j], s2[j]);
386+
387+
if (reciprocal)
388+
for (size_t j = 0; j < buffer_elements; j++)
389+
r[j] = (float)func.f_f(s2[j]);
390+
else
391+
for (size_t j = 0; j < buffer_elements; j++)
392+
r[j] = (cl_double)func.f_ff(s[j], s2[j]);
380393

381394
// Read the data back -- no need to wait for the first N-1 buffers but wait
382395
// for the last buffer. This is an in order queue.
@@ -406,7 +419,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
406419
if (t[j] != q[j])
407420
{
408421
cl_double test = ((cl_double *)q)[j];
409-
long double correct = func.f_ff(s[j], s2[j]);
422+
long double correct =
423+
reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
424+
410425
float err = Bruteforce_Ulp_Error_Double(test, correct);
411426
int fail = !(fabsf(err) <= ulps);
412427

@@ -479,8 +494,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
479494
}
480495
else if (IsDoubleSubnormal(s2[j]))
481496
{
482-
long double correct2 = func.f_ff(s[j], 0.0);
483-
long double correct3 = func.f_ff(s[j], -0.0);
497+
long double correct2 =
498+
reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
499+
long double correct3 =
500+
reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
501+
484502
float err2 =
485503
Bruteforce_Ulp_Error_Double(test, correct2);
486504
float err3 =

test_conformance/math_brute_force/binary_operator_float.cpp

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
208208
cl_float *s2 = 0;
209209
RoundingMode oldRoundMode;
210210

211+
bool reciprocal = strcmp(name, "reciprocal") == 0;
212+
const float reciprocalArrayX[] = { 1.f };
213+
const float *specialValuesX = reciprocal ? reciprocalArrayX : specialValues;
214+
size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
215+
211216
if (relaxedMode)
212217
{
213218
func = job->f->rfunc;
@@ -239,23 +244,23 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
239244
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
240245
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
241246
cl_uint idx = 0;
242-
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
247+
int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
243248
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
244249

245250
if (job_id <= (cl_uint)lastSpecialJobIndex)
246251
{
247252
// Insert special values
248253
uint32_t x, y;
249254

250-
x = (job_id * buffer_elements) % specialValuesCount;
255+
x = (job_id * buffer_elements) % specialValuesCountX;
251256
y = (job_id * buffer_elements) / specialValuesCount;
252257

253258
for (; idx < buffer_elements; idx++)
254259
{
255-
p[idx] = ((cl_uint *)specialValues)[x];
260+
p[idx] = ((cl_uint *)specialValuesX)[x];
256261
p2[idx] = ((cl_uint *)specialValues)[y];
257262
++x;
258-
if (x >= specialValuesCount)
263+
if (x >= specialValuesCountX)
259264
{
260265
x = 0;
261266
y++;
@@ -269,13 +274,19 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
269274
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
270275
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
271276
}
277+
else if (relaxedMode && reciprocal)
278+
{
279+
cl_uint p2j = p2[idx] & 0x7fffffff;
280+
// Replace values outside [2^-126, 2^126] with QNaN
281+
if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
282+
}
272283
}
273284
}
274285

275286
// Init any remaining values
276287
for (; idx < buffer_elements; idx++)
277288
{
278-
p[idx] = genrand_int32(d);
289+
p[idx] = reciprocal ? ((cl_uint *)specialValuesX)[0] : genrand_int32(d);
279290
p2[idx] = genrand_int32(d);
280291

281292
if (relaxedMode && strcmp(name, "divide") == 0)
@@ -286,6 +297,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
286297
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
287298
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
288299
}
300+
else if (relaxedMode && reciprocal)
301+
{
302+
cl_uint p2j = p2[idx] & 0x7fffffff;
303+
// Replace values outside [2^-126, 2^126] with QNaN
304+
if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
305+
}
289306
}
290307

291308
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -402,18 +419,31 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
402419
s2 = (float *)gIn2 + thread_id * buffer_elements;
403420
if (gInfNanSupport)
404421
{
405-
for (size_t j = 0; j < buffer_elements; j++)
406-
r[j] = (float)func.f_ff(s[j], s2[j]);
422+
if (reciprocal)
423+
for (size_t j = 0; j < buffer_elements; j++)
424+
r[j] = (float)func.f_f(s2[j]);
425+
else
426+
for (size_t j = 0; j < buffer_elements; j++)
427+
r[j] = (float)func.f_ff(s[j], s2[j]);
407428
}
408429
else
409430
{
410-
for (size_t j = 0; j < buffer_elements; j++)
411-
{
412-
feclearexcept(FE_OVERFLOW);
413-
r[j] = (float)func.f_ff(s[j], s2[j]);
414-
overflow[j] =
415-
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
416-
}
431+
if (reciprocal)
432+
for (size_t j = 0; j < buffer_elements; j++)
433+
{
434+
feclearexcept(FE_OVERFLOW);
435+
r[j] = (float)func.f_f(s2[j]);
436+
overflow[j] =
437+
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
438+
}
439+
else
440+
for (size_t j = 0; j < buffer_elements; j++)
441+
{
442+
feclearexcept(FE_OVERFLOW);
443+
r[j] = (float)func.f_ff(s[j], s2[j]);
444+
overflow[j] =
445+
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
446+
}
417447
}
418448

419449
if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
@@ -448,7 +478,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
448478
if (t[j] != q[j])
449479
{
450480
float test = ((float *)q)[j];
451-
double correct = func.f_ff(s[j], s2[j]);
481+
double correct =
482+
reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
452483

453484
// Per section 10 paragraph 6, accept any result if an input or
454485
// output is a infinity or NaN or overflow
@@ -485,7 +516,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
485516
}
486517

487518
// retry per section 6.5.3.3
488-
if (IsFloatSubnormal(s[j]))
519+
if (!reciprocal && IsFloatSubnormal(s[j]))
489520
{
490521
double correct2, correct3;
491522
float err2, err3;
@@ -591,8 +622,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
591622

592623
if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
593624

594-
correct2 = func.f_ff(s[j], 0.0);
595-
correct3 = func.f_ff(s[j], -0.0);
625+
correct2 =
626+
reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
627+
correct3 =
628+
reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
596629

597630
// Per section 10 paragraph 6, accept any result if an
598631
// input or output is a infinity or NaN or overflow
@@ -625,7 +658,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
625658
}
626659
}
627660

628-
629661
if (fabsf(err) > tinfo->maxError)
630662
{
631663
tinfo->maxError = fabsf(err);

test_conformance/math_brute_force/binary_operator_half.cpp

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
120120
std::vector<float> s(0), s2(0);
121121
RoundingMode oldRoundMode;
122122

123+
bool reciprocal = strcmp(name, "reciprocal") == 0;
124+
const cl_half reciprocalArrayHalfX[] = { 0x3c00 };
125+
const cl_half *specialValuesHalfX =
126+
reciprocal ? reciprocalArrayHalfX : specialValuesHalf;
127+
size_t specialValuesHalfCountX = reciprocal ? 1 : specialValuesHalfCount;
128+
123129
cl_event e[VECTOR_SIZE_COUNT];
124130
cl_half *out[VECTOR_SIZE_COUNT];
125131

@@ -148,22 +154,23 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
148154
cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements;
149155
cl_uint idx = 0;
150156
int totalSpecialValueCount =
151-
specialValuesHalfCount * specialValuesHalfCount;
157+
specialValuesHalfCountX * specialValuesHalfCount;
152158
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
153159

154160
if (job_id <= (cl_uint)lastSpecialJobIndex)
155161
{
156162
// Insert special values
157163
uint32_t x, y;
158164

159-
x = (job_id * buffer_elements) % specialValuesHalfCount;
165+
x = (job_id * buffer_elements) % specialValuesHalfCountX;
160166
y = (job_id * buffer_elements) / specialValuesHalfCount;
161167

162168
for (; idx < buffer_elements; idx++)
163169
{
164-
p[idx] = specialValuesHalf[x];
170+
p[idx] = specialValuesHalfX[x];
165171
p2[idx] = specialValuesHalf[y];
166-
if (++x >= specialValuesHalfCount)
172+
++x;
173+
if (x >= specialValuesHalfCountX)
167174
{
168175
x = 0;
169176
y++;
@@ -175,7 +182,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
175182
// Init any remaining values
176183
for (; idx < buffer_elements; idx++)
177184
{
178-
p[idx] = (cl_half)genrand_int32(d);
185+
p[idx] = reciprocal ? ((cl_half *)specialValuesHalfX)[0]
186+
: (cl_half)genrand_int32(d);
179187
p2[idx] = (cl_half)genrand_int32(d);
180188
}
181189
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -283,11 +291,23 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
283291
s.resize(buffer_elements);
284292
s2.resize(buffer_elements);
285293

286-
for (size_t j = 0; j < buffer_elements; j++)
294+
if (reciprocal)
295+
{
296+
for (size_t j = 0; j < buffer_elements; j++)
297+
{
298+
s[j] = HTF(p[j]);
299+
s2[j] = HTF(p2[j]);
300+
r[j] = HFF(func.f_f(s2[j]));
301+
}
302+
}
303+
else
287304
{
288-
s[j] = HTF(p[j]);
289-
s2[j] = HTF(p2[j]);
290-
r[j] = HFF(func.f_ff(s[j], s2[j]));
305+
for (size_t j = 0; j < buffer_elements; j++)
306+
{
307+
s[j] = HTF(p[j]);
308+
s2[j] = HTF(p2[j]);
309+
r[j] = HFF(func.f_ff(s[j], s2[j]));
310+
}
291311
}
292312

293313
if (ftz) RestoreFPState(&oldMode);
@@ -320,7 +340,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
320340
if (r[j] != q[j])
321341
{
322342
float test = HTF(q[j]);
323-
float correct = func.f_ff(s[j], s2[j]);
343+
float correct =
344+
reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
324345

325346
// Per section 10 paragraph 6, accept any result if an input or
326347
// output is a infinity or NaN or overflow
@@ -446,9 +467,10 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
446467
double correct2, correct3;
447468
float err2, err3;
448469

449-
correct2 = func.f_ff(s[j], 0.0);
450-
correct3 = func.f_ff(s[j], -0.0);
451-
470+
correct2 =
471+
reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
472+
correct3 =
473+
reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
452474

453475
// Per section 10 paragraph 6, accept any result if an
454476
// input or output is a infinity or NaN or overflow

test_conformance/math_brute_force/function_list.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@
7878
#define reference_copysign NULL
7979
#define reference_sqrt NULL
8080
#define reference_sqrtl NULL
81+
#define reference_reciprocal NULL
82+
#define reference_reciprocall NULL
83+
#define reference_relaxed_reciprocal NULL
84+
8185
#define reference_divide NULL
8286
#define reference_dividel NULL
8387
#define reference_relaxed_divide NULL
@@ -346,7 +350,6 @@ const Func functionList[] = {
346350

347351
ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
348352
ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
349-
//ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
350353
ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
351354
ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
352355
ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
@@ -418,6 +421,21 @@ const Func functionList[] = {
418421
// basic operations
419422
OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
420423
OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
424+
//ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
425+
{ "reciprocal",
426+
"/",
427+
{ (void*)reference_reciprocal },
428+
{ (void*)reference_reciprocall },
429+
{ (void*)reference_relaxed_reciprocal },
430+
2.5f,
431+
0.0f,
432+
0.0f,
433+
3.0f,
434+
2.5f,
435+
INFINITY,
436+
FTZ_OFF,
437+
RELAXED_ON,
438+
binaryOperatorF },
421439
{ "divide",
422440
"/",
423441
{ (void*)reference_divide },

0 commit comments

Comments
 (0)