From 2b4e9a65d1782a12095c0d9c0957aef7e62c204b Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 16 Apr 2026 18:47:10 +0200 Subject: [PATCH 1/7] Added edge cases for math brute force testing --- .../math_brute_force/CMakeLists.txt | 1 + .../math_brute_force/edge_cases.cpp | 708 ++++++++++++++++++ test_conformance/math_brute_force/main.cpp | 22 +- test_conformance/math_brute_force/utility.h | 2 + 4 files changed, 722 insertions(+), 11 deletions(-) create mode 100644 test_conformance/math_brute_force/edge_cases.cpp diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index 35d4e0b3ab..c756b915e6 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -15,6 +15,7 @@ set(${MODULE_NAME}_SOURCES binary_two_results_i_half.cpp common.cpp common.h + edge_cases.cpp function_list.cpp function_list.h i_unary_double.cpp diff --git a/test_conformance/math_brute_force/edge_cases.cpp b/test_conformance/math_brute_force/edge_cases.cpp new file mode 100644 index 0000000000..800a6d5027 --- /dev/null +++ b/test_conformance/math_brute_force/edge_cases.cpp @@ -0,0 +1,708 @@ +// +// Copyright (c) 2026 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "CL/cl.h" +#include "harness/errorHelpers.h" +#include "harness/kernelHelpers.h" +#include "harness/testHarness.h" +#include "harness/typeWrappers.h" + +#include "utility.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace { + +constexpr std::size_t CL_VALUE_MAX_BYTES = 64; + +struct AnyValue +{ + std::string cl_type; + std::size_t byte_size = 0; + std::array data{}; + + template static AnyValue make(const T &val) + { + static_assert(sizeof(T) <= CL_VALUE_MAX_BYTES, + "CLValue: type exceeds CL_VALUE_MAX_BYTES"); + AnyValue v; + v.cl_type = "half"; + if constexpr (std::is_same_v) + v.cl_type = "float"; + else if constexpr (std::is_same_v) + v.cl_type = "double"; + else if constexpr (std::is_same_v) + v.cl_type = "int"; + + v.byte_size = sizeof(T); + std::memcpy(v.data.data(), &val, sizeof(T)); + return v; + } + + template bool all_elements_nan() const + { + if constexpr (std::is_same_v) + { + for (std::size_t off = 0; off + 2 <= byte_size; off += 2) + { + uint16_t bits; + std::memcpy(&bits, data.data() + off, 2); + if ((bits & 0x7C00u) != 0x7C00u || (bits & 0x03FFu) == 0u) + return false; + } + return true; + } + if constexpr (std::is_same_v) + { + for (std::size_t off = 0; off + 8 <= byte_size; off += 8) + { + uint64_t bits; + std::memcpy(&bits, data.data() + off, 8); + if ((bits & 0x7FF0000000000000ull) != 0x7FF0000000000000ull + || (bits & 0x000FFFFFFFFFFFFFull) == 0ull) + return false; + } + return true; + } + + for (std::size_t off = 0; off + 4 <= byte_size; off += 4) + { + uint32_t bits; + std::memcpy(&bits, data.data() + off, 4); + if ((bits & 0x7F800000u) != 0x7F800000u + || (bits & 0x007FFFFFu) == 0u) + return false; + } + return true; + } +}; + +struct EdgeCaseSpec +{ + const char *func_name; + std::vector inputs; + AnyValue expected; + bool expect_nan = false; +}; + +struct AbstractValue +{ + enum class Kind + { + PosZero, + NegZero, + PosInf, + NegInf, + NaN, + Finite, + Int, + SmallestPosDenorm, + SmallestNegDenorm, + } kind; + + double d = 0.0; + int i = 0; +}; + +inline AbstractValue AV_POS_ZERO() { return { AbstractValue::Kind::PosZero }; } +inline AbstractValue AV_NEG_ZERO() { return { AbstractValue::Kind::NegZero }; } +inline AbstractValue AV_POS_INF() { return { AbstractValue::Kind::PosInf }; } +inline AbstractValue AV_NEG_INF() { return { AbstractValue::Kind::NegInf }; } +inline AbstractValue AV_NAN() { return { AbstractValue::Kind::NaN }; } +inline AbstractValue AV_F(double v) +{ + return { AbstractValue::Kind::Finite, v }; +} +inline AbstractValue AV_INT(int v) +{ + return { AbstractValue::Kind::Int, 0.0, v }; +} +inline AbstractValue AV_SMALLEST_POS_DENORM() +{ + return { AbstractValue::Kind::SmallestPosDenorm }; +} +inline AbstractValue AV_SMALLEST_NEG_DENORM() +{ + return { AbstractValue::Kind::SmallestNegDenorm }; +} + +const AbstractValue POS_ZERO = AV_POS_ZERO(); +const AbstractValue NEG_ZERO = AV_NEG_ZERO(); +const AbstractValue POS_INF = AV_POS_INF(); +const AbstractValue NEG_INF = AV_NEG_INF(); +const AbstractValue NAN_V = AV_NAN(); +const AbstractValue ONE = AV_F(1.0); +const AbstractValue NEG_ONE = AV_F(-1.0); +const AbstractValue TWO = AV_F(2.0); +const AbstractValue NEG_TWO = AV_F(-2.0); + +struct AbstractEdgeCase +{ + const char *func_name; + std::vector inputs; + AbstractValue expected; + + bool expect_nan = false; // check isnan() instead of memcmp + + bool requires_inf_nan = false; // CL_FP_INF_NAN + bool requires_denorm = false; // CL_FP_DENORM + bool requires_rte = false; // CL_FP_ROUND_TO_NEAREST +}; + +static const AbstractEdgeCase edge_case_table[] = { + + { "acospi", { ONE }, POS_ZERO }, + { "acospi", { AV_F(2) }, NAN_V, true, true }, + { "acospi", { AV_F(-2) }, NAN_V, true, true }, + { "asinpi", { POS_ZERO }, POS_ZERO }, + { "asinpi", { NEG_ZERO }, NEG_ZERO }, + { "asinpi", { AV_F(2) }, NAN_V, true, true }, + { "asinpi", { AV_F(-2) }, NAN_V, true, true }, + { "atanpi", { POS_ZERO }, POS_ZERO }, + { "atanpi", { NEG_ZERO }, NEG_ZERO }, + { "atanpi", { POS_INF }, AV_F(0.5), false, true }, + { "atanpi", { NEG_INF }, AV_F(-0.5), false, true }, + { "atan2pi", { POS_ZERO, NEG_ZERO }, AV_F(1.0) }, + { "atan2pi", { NEG_ZERO, NEG_ZERO }, AV_F(-1.0) }, + { "atan2pi", { POS_ZERO, POS_ZERO }, POS_ZERO }, + { "atan2pi", { NEG_ZERO, POS_ZERO }, NEG_ZERO }, + { "atan2pi", { POS_ZERO, NEG_ONE }, AV_F(1.0) }, + { "atan2pi", { NEG_ZERO, NEG_ONE }, AV_F(-1.0) }, + { "atan2pi", { POS_ZERO, ONE }, POS_ZERO }, + { "atan2pi", { NEG_ZERO, ONE }, NEG_ZERO }, + { "atan2pi", { NEG_ONE, POS_ZERO }, AV_F(-0.5) }, + { "atan2pi", { NEG_ONE, NEG_ZERO }, AV_F(-0.5) }, + { "atan2pi", { ONE, POS_ZERO }, AV_F(0.5) }, + { "atan2pi", { ONE, NEG_ZERO }, AV_F(0.5) }, + { "atan2pi", { ONE, NEG_INF }, AV_F(1.0), false, true }, + { "atan2pi", { NEG_ONE, NEG_INF }, AV_F(-1.0), false, true }, + { "atan2pi", { ONE, POS_INF }, POS_ZERO, false, true }, + { "atan2pi", { NEG_ONE, POS_INF }, NEG_ZERO, false, true }, + { "atan2pi", { POS_INF, ONE }, AV_F(0.5), false, true }, + { "atan2pi", { NEG_INF, ONE }, AV_F(-0.5), false, true }, + { "atan2pi", { POS_INF, NEG_INF }, AV_F(0.75), false, true }, + { "atan2pi", { NEG_INF, NEG_INF }, AV_F(-0.75), false, true }, + { "atan2pi", { POS_INF, POS_INF }, AV_F(0.25), false, true }, + { "atan2pi", { NEG_INF, POS_INF }, AV_F(-0.25), false, true }, + { "ceil", { AV_F(-0.5) }, NEG_ZERO, false, false, false, true }, + { "ceil", { AV_F(-0.25) }, NEG_ZERO, false, false, false, true }, + { "cospi", { POS_ZERO }, ONE }, + { "cospi", { NEG_ZERO }, ONE }, + { "cospi", { AV_F(0.5) }, POS_ZERO }, + { "cospi", { AV_F(1.5) }, POS_ZERO }, + { "cospi", { AV_F(2.5) }, POS_ZERO }, + { "cospi", { AV_F(-0.5) }, POS_ZERO }, + { "cospi", { AV_F(-1.5) }, POS_ZERO }, + { "cospi", { POS_INF }, NAN_V, true, true }, + { "cospi", { NEG_INF }, NAN_V, true, true }, + { "exp10", { NEG_INF }, POS_ZERO, false, true }, + { "exp10", { POS_INF }, POS_INF, false, true }, + { "fdim", { ONE, NAN_V }, NAN_V, true, true }, + { "fdim", { NAN_V, ONE }, NAN_V, true, true }, + { "fdim", { POS_INF, NAN_V }, NAN_V, true, true }, + { "fdim", { NAN_V, POS_INF }, NAN_V, true, true }, + { "fmod", { POS_ZERO, NAN_V }, NAN_V, true, true }, + { "fmod", { NEG_ZERO, NAN_V }, NAN_V, true, true }, + { "nextafter", + { NEG_ZERO, ONE }, + AV_SMALLEST_POS_DENORM(), + false, + false, + true }, + { "nextafter", + { POS_ZERO, NEG_ONE }, + AV_SMALLEST_NEG_DENORM(), + false, + false, + true }, + { "pow", { POS_ZERO, NEG_INF }, POS_INF, false, true }, + { "pow", { NEG_ZERO, NEG_INF }, POS_INF, false, true }, + { "pown", { POS_ZERO, AV_INT(0) }, ONE }, + { "pown", { NEG_ZERO, AV_INT(0) }, ONE }, + { "pown", { POS_INF, AV_INT(0) }, ONE, false, true }, + { "pown", { NEG_INF, AV_INT(0) }, ONE, false, true }, + { "pown", { NAN_V, AV_INT(0) }, ONE, false, true }, + { "pown", { POS_ZERO, AV_INT(-1) }, POS_INF, false, true }, + { "pown", { NEG_ZERO, AV_INT(-1) }, NEG_INF, false, true }, + { "pown", { POS_ZERO, AV_INT(-3) }, POS_INF, false, true }, + { "pown", { NEG_ZERO, AV_INT(-3) }, NEG_INF, false, true }, + { "pown", { POS_ZERO, AV_INT(-2) }, POS_INF, false, true }, + { "pown", { NEG_ZERO, AV_INT(-2) }, POS_INF, false, true }, + { "pown", { POS_ZERO, AV_INT(2) }, POS_ZERO }, + { "pown", { NEG_ZERO, AV_INT(2) }, POS_ZERO }, + { "pown", { POS_ZERO, AV_INT(1) }, POS_ZERO }, + { "pown", { NEG_ZERO, AV_INT(1) }, NEG_ZERO }, + { "pown", { POS_ZERO, AV_INT(3) }, POS_ZERO }, + { "pown", { NEG_ZERO, AV_INT(3) }, NEG_ZERO }, + { "powr", { ONE, POS_ZERO }, ONE }, + { "powr", { TWO, NEG_ZERO }, ONE }, + { "powr", { POS_ZERO, NEG_ONE }, POS_INF, false, true }, + { "powr", { NEG_ZERO, NEG_ONE }, POS_INF, false, true }, + { "powr", { POS_ZERO, NEG_INF }, POS_INF, false, true }, + { "powr", { NEG_ZERO, NEG_INF }, POS_INF, false, true }, + { "powr", { POS_ZERO, ONE }, POS_ZERO }, + { "powr", { NEG_ZERO, ONE }, POS_ZERO }, + { "powr", { ONE, TWO }, ONE }, + { "powr", { ONE, NEG_ONE }, ONE }, + { "powr", { NEG_ONE, TWO }, NAN_V, true, true }, + { "powr", { POS_ZERO, POS_ZERO }, NAN_V, true, true }, + { "powr", { NEG_ZERO, NEG_ZERO }, NAN_V, true, true }, + { "powr", { POS_INF, POS_ZERO }, NAN_V, true, true }, + { "powr", { POS_INF, NEG_ZERO }, NAN_V, true, true }, + { "powr", { ONE, POS_INF }, NAN_V, true, true }, + { "powr", { ONE, NEG_INF }, NAN_V, true, true }, + { "rint", { AV_F(-0.5) }, NEG_ZERO, false, false, false, true }, + { "rootn", { POS_ZERO, AV_INT(-1) }, POS_INF, false, true }, + { "rootn", { NEG_ZERO, AV_INT(-1) }, NEG_INF, false, true }, + { "rootn", { POS_ZERO, AV_INT(-3) }, POS_INF, false, true }, + { "rootn", { NEG_ZERO, AV_INT(-3) }, NEG_INF, false, true }, + { "rootn", { POS_ZERO, AV_INT(-2) }, POS_INF, false, true }, + { "rootn", { NEG_ZERO, AV_INT(-2) }, POS_INF, false, true }, + { "rootn", { POS_ZERO, AV_INT(2) }, POS_ZERO }, + { "rootn", { NEG_ZERO, AV_INT(2) }, POS_ZERO }, + { "rootn", { POS_ZERO, AV_INT(1) }, POS_ZERO }, + { "rootn", { NEG_ZERO, AV_INT(1) }, NEG_ZERO }, + { "rootn", { POS_ZERO, AV_INT(3) }, POS_ZERO }, + { "rootn", { NEG_ZERO, AV_INT(3) }, NEG_ZERO }, + { "rootn", { NEG_ONE, AV_INT(2) }, NAN_V, true, true }, + { "rootn", { NEG_ONE, AV_INT(4) }, NAN_V, true, true }, + { "rootn", { ONE, AV_INT(0) }, NAN_V, true, true }, + { "rootn", { POS_ZERO, AV_INT(0) }, NAN_V, true, true }, + { "round", { AV_F(-0.25) }, NEG_ZERO, false, false, false, true }, + { "sinpi", { POS_ZERO }, POS_ZERO }, + { "sinpi", { NEG_ZERO }, NEG_ZERO }, + { "sinpi", { ONE }, POS_ZERO }, + { "sinpi", { TWO }, POS_ZERO }, + { "sinpi", { AV_F(4.0) }, POS_ZERO }, + { "sinpi", { NEG_ONE }, NEG_ZERO }, + { "sinpi", { NEG_TWO }, NEG_ZERO }, + { "sinpi", { AV_F(-4.0) }, NEG_ZERO }, + { "sinpi", { POS_INF }, NAN_V, true, true }, + { "sinpi", { NEG_INF }, NAN_V, true, true }, + { "tanpi", { POS_ZERO }, POS_ZERO }, + { "tanpi", { NEG_ZERO }, NEG_ZERO }, + { "tanpi", { POS_INF }, NAN_V, true, true }, + { "tanpi", { NEG_INF }, NAN_V, true, true }, + { "tanpi", { AV_F(0.0) }, POS_ZERO }, + { "tanpi", { AV_F(2.0) }, POS_ZERO }, + { "tanpi", { AV_F(-2.0) }, NEG_ZERO }, + { "tanpi", { ONE }, NEG_ZERO }, + { "tanpi", { NEG_ONE }, POS_ZERO }, + { "tanpi", { AV_F(3.0) }, NEG_ZERO }, + { "tanpi", { AV_F(-3.0) }, POS_ZERO }, + { "tanpi", { AV_F(0.5) }, POS_INF, false, true }, + { "tanpi", { AV_F(2.5) }, POS_INF, false, true }, + { "tanpi", { AV_F(1.5) }, NEG_INF, false, true }, + { "tanpi", { AV_F(-0.5) }, NEG_INF, false, true }, + { "trunc", { AV_F(-0.5) }, NEG_ZERO, false, false, false, true }, + { "trunc", { AV_F(-0.25) }, NEG_ZERO, false, false, false, true }, +}; + +inline std::string build_kernel_source(const EdgeCaseSpec &ec) +{ + std::string s; + s += "__kernel void test_edge_case(\n"; + s += " __global "; + s += ec.expected.cl_type; + s += " *out"; + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + s += ",\n __global const "; + s += ec.inputs[i].cl_type; + s += " *in"; + s += std::to_string(i); + } + s += ")\n{\n *out = "; + s += ec.func_name; + s += "("; + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + if (i) s += ", "; + s += "*in"; + s += std::to_string(i); + } + s += ");\n}\n"; + return s; +} + +void log_anyvalue(const AnyValue &v) +{ + const std::size_t elem = [&] { + if (v.cl_type == "double") return std::size_t(8); + if (v.cl_type == "float") return std::size_t(4); + if (v.cl_type == "int") return std::size_t(4); + return std::size_t(2); // half + }(); + + for (std::size_t off = 0; off < v.byte_size; off += elem) + { + switch (elem) + { + case 8: { + uint64_t bits; + std::memcpy(&bits, v.data.data() + off, 8); + log_error("0x%016" PRIx64, bits); + break; + } + case 4: { + uint32_t bits; + std::memcpy(&bits, v.data.data() + off, 4); + log_error("0x%08" PRIx32, bits); + break; + } + case 2: { + uint16_t bits; + std::memcpy(&bits, v.data.data() + off, 2); + log_error("0x%04" PRIx16, bits); + break; + } + } + } +} + +template +inline cl_int run_edge_case(const EdgeCaseSpec &ec, cl_context context, + cl_command_queue queue) +{ + // Build kernel + std::string src = build_kernel_source(ec); + if constexpr (std::is_same_v) + src = std::string("#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n") + + src; + else if constexpr (std::is_same_v) + src = std::string("#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n") + + src; + const char *src_ptr = src.c_str(); + clProgramWrapper program; + clKernelWrapper kernel; + + if (create_single_kernel_helper(context, &program, &kernel, 1, &src_ptr, + "test_edge_case")) + { + log_error("ERROR: Failed to build kernel for '%s'\nSource:\n%s\n", + ec.func_name, src.c_str()); + return TEST_FAIL; + } + + cl_int err = CL_SUCCESS; + + clMemWrapper out_buf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + ec.expected.byte_size, nullptr, &err); + if (err != CL_SUCCESS) + { + log_error("ERROR: clCreateBuffer (out) failed for '%s': %d\n", + ec.func_name, err); + return TEST_FAIL; + } + + std::vector in_bufs; + in_bufs.reserve(ec.inputs.size()); + + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + cl_mem buf = clCreateBuffer(context, CL_MEM_READ_ONLY, + ec.inputs[i].byte_size, nullptr, &err); + if (err != CL_SUCCESS) + { + log_error("ERROR: clCreateBuffer (in%zu) failed for '%s': %d\n", i, + ec.func_name, err); + return TEST_FAIL; + } + in_bufs.push_back(buf); + + err = + clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, ec.inputs[i].byte_size, + ec.inputs[i].data.data(), 0, nullptr, nullptr); + if (err != CL_SUCCESS) + { + log_error("ERROR: clEnqueueWriteBuffer (in%zu) failed for" + " '%s': %d\n", + i, ec.func_name, err); + return TEST_FAIL; + } + } + + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &out_buf); + for (cl_uint i = 0; i < static_cast(in_bufs.size()); ++i) + err |= clSetKernelArg(kernel, i + 1, sizeof(cl_mem), &in_bufs[i]); + if (err != CL_SUCCESS) + { + log_error("ERROR: clSetKernelArg failed for '%s': %d\n", ec.func_name, + err); + return TEST_FAIL; + } + + { + const std::size_t gws = 1; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &gws, nullptr, + 0, nullptr, nullptr); + if (err != CL_SUCCESS) + { + log_error("ERROR: clEnqueueNDRangeKernel failed for '%s': %d\n", + ec.func_name, err); + return TEST_FAIL; + } + } + + { + std::array result{}; + err = clEnqueueReadBuffer(queue, out_buf, CL_TRUE, 0, + ec.expected.byte_size, result.data(), 0, + nullptr, nullptr); + if (err != CL_SUCCESS) + { + log_error("ERROR: clEnqueueReadBuffer failed for '%s': %d\n", + ec.func_name, err); + return TEST_FAIL; + } + + if (ec.expect_nan) + { + AnyValue got; + got.byte_size = ec.expected.byte_size; + std::memcpy(got.data.data(), result.data(), got.byte_size); + if (!got.all_elements_nan()) + { + log_error("FAIL: %s(", ec.func_name); + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + if (i) log_error(", "); + log_error("0x"); + log_anyvalue(ec.inputs[i]); + } + log_error(") — expected NaN, got 0x"); + for (std::size_t i = 0; i < ec.expected.byte_size; ++i) + log_error("%02x", result[i]); + log_error("\n"); + err = -1; + } + } + else + { + if (std::memcmp(result.data(), ec.expected.data.data(), + ec.expected.byte_size) + != 0) + { + log_error("FAIL: %s(", ec.func_name); + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + if (i) log_error(", "); + log_error("0x"); + log_anyvalue(ec.inputs[i]); + } + log_error(") - expected 0x"); + for (std::size_t i = 0; i < ec.expected.byte_size; ++i) + log_error("%02x", ec.expected.data[i]); + log_error(", got 0x"); + for (std::size_t i = 0; i < ec.expected.byte_size; ++i) + log_error("%02x", result[i]); + log_error("\n"); + err = -1; + } + } + } + + return err == -1 ? TEST_FAIL : TEST_PASS; +} + +template AnyValue abstract_to_anyvalue(const AbstractValue &av) +{ + // Int jest niezależny od precyzji — zawsze cl_int + if (av.kind == AbstractValue::Kind::Int) + return AnyValue::make(av.i); + + if constexpr (std::is_same_v) + { + uint16_t bits = 0; + switch (av.kind) + { + case AbstractValue::Kind::PosZero: bits = 0x0000; break; + case AbstractValue::Kind::NegZero: bits = 0x8000; break; + case AbstractValue::Kind::PosInf: bits = 0x7C00; break; + case AbstractValue::Kind::NegInf: bits = 0xFC00; break; + case AbstractValue::Kind::NaN: bits = 0x7E00; break; + case AbstractValue::Kind::SmallestPosDenorm: bits = 0x0001; break; + case AbstractValue::Kind::SmallestNegDenorm: bits = 0x8001; break; + case AbstractValue::Kind::Finite: + bits = + cl_half_from_float(static_cast(av.d), CL_HALF_RTE); + break; + default: break; + } + return AnyValue::make(bits); + } + else + { + T val{}; + switch (av.kind) + { + case AbstractValue::Kind::PosZero: val = T(0); break; + case AbstractValue::Kind::NegZero: val = -T(0); break; + case AbstractValue::Kind::PosInf: + val = std::numeric_limits::infinity(); + break; + case AbstractValue::Kind::NegInf: + val = -std::numeric_limits::infinity(); + break; + case AbstractValue::Kind::NaN: + val = std::numeric_limits::quiet_NaN(); + break; + case AbstractValue::Kind::Finite: val = static_cast(av.d); break; + case AbstractValue::Kind::SmallestPosDenorm: + val = std::numeric_limits::denorm_min(); + break; + case AbstractValue::Kind::SmallestNegDenorm: + val = -std::numeric_limits::denorm_min(); + break; + default: break; + } + return AnyValue::make(val); + } +} + +template EdgeCaseSpec make_edge_case(const AbstractEdgeCase &aec) +{ + EdgeCaseSpec ec; + ec.expect_nan = aec.expect_nan; + ec.expected = abstract_to_anyvalue(aec.expected); + ec.inputs.reserve(aec.inputs.size()); + for (const auto &av : aec.inputs) + ec.inputs.push_back(abstract_to_anyvalue(av)); + ec.func_name = aec.func_name; + return ec; +} + +inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, + cl_context context, cl_command_queue queue) +{ + cl_int overall = CL_SUCCESS; + log_info("float\n"); + for (std::size_t i = 0; i < count; ++i) + { + auto aec = cases[i]; + if (gIsEmbedded) + { + if (aec.requires_denorm && !(gFloatCapabilities & CL_FP_DENORM)) + { + log_info("SKIP (no CL_FP_DENORM): %s\n", aec.func_name); + continue; + } + + if (aec.requires_inf_nan && !(gFloatCapabilities & CL_FP_INF_NAN)) + { + log_info("SKIP (no CL_FP_INF_NAN): %s\n", aec.func_name); + continue; + } + + if (aec.requires_rte + && !(gFloatCapabilities & CL_FP_ROUND_TO_NEAREST)) + { + log_info("SKIP (no CL_FP_ROUND_TO_NEAREST): %s\n", + aec.func_name); + continue; + } + } + + EdgeCaseSpec ec = make_edge_case(aec); + + if (run_edge_case(ec, context, queue) != CL_SUCCESS) + overall = -1; + } + + if (gHasHalf) + { + log_info("half\n"); + for (std::size_t i = 0; i < count; ++i) + { + auto aec = cases[i]; + if (aec.requires_denorm && !(gHalfCapabilities & CL_FP_DENORM)) + { + log_info("SKIP fp16 (no CL_FP_DENORM): %s\n", aec.func_name); + continue; + } + + if (aec.requires_inf_nan && !(gHalfCapabilities & CL_FP_INF_NAN)) + { + log_info("SKIP fp16 (no CL_FP_INF_NAN): %s\n", aec.func_name); + continue; + } + + if (aec.requires_rte + && !(gHalfCapabilities & CL_FP_ROUND_TO_NEAREST)) + { + log_info("SKIP fp16 (no CL_FP_ROUND_TO_NEAREST): %s\n", + aec.func_name); + continue; + } + + EdgeCaseSpec ec = make_edge_case(aec); + + if (run_edge_case(ec, context, queue) != CL_SUCCESS) + overall = -1; + } + } + + if (gHasDouble) + { + log_info("double\n"); + for (std::size_t i = 0; i < count; ++i) + { + auto aec = cases[i]; + + if (aec.requires_denorm && !(gDoubleCapabilities & CL_FP_DENORM)) + { + log_info("SKIP fp64 (no CL_FP_DENORM): %s\n", aec.func_name); + continue; + } + + if (aec.requires_inf_nan && !(gDoubleCapabilities & CL_FP_INF_NAN)) + { + log_info("SKIP fp64 (no CL_FP_INF_NAN): %s\n", aec.func_name); + continue; + } + + if (aec.requires_rte + && !(gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST)) + { + log_info("SKIP fp64 (no CL_FP_ROUND_TO_NEAREST): %s\n", + aec.func_name); + continue; + } + + EdgeCaseSpec ec = make_edge_case(aec); + + if (run_edge_case(ec, context, queue) != CL_SUCCESS) + overall = -1; + } + } + + return overall; +} + +} // anonymous namespace + +REGISTER_TEST(math_edge_cases) +{ + return run_edge_cases( + edge_case_table, sizeof(edge_case_table) / sizeof((edge_case_table)[0]), + gContext, gQueue); +} diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 2c161e23b9..4ae5a185da 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -67,7 +67,7 @@ static int gStopOnError = 0; static bool gSkipRestOfTests; int gForceFTZ = 0; int gHostFill = 0; -static int gHasDouble = 0; +int gHasDouble = 0; static int gTestFloat = 1; // This flag should be 'ON' by default and it can be changed through the command // line arguments. @@ -83,6 +83,7 @@ static int gTestFastRelaxed = 1; int gFastRelaxedDerived = 1; int gHasHalf = 0; cl_device_fp_config gHalfCapabilities = 0; +cl_device_fp_config gDoubleCapabilities = 0; int gDeviceILogb0 = 1; int gDeviceILogbNaN = 1; int gCheckTininessBeforeRounding = 1; @@ -687,10 +688,9 @@ test_status InitCL(cl_device_id device) { gHasDouble ^= 1; #if defined(CL_DEVICE_DOUBLE_FP_CONFIG) - cl_device_fp_config doubleCapabilities = 0; if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG, - sizeof(doubleCapabilities), - &doubleCapabilities, NULL))) + sizeof(gDoubleCapabilities), + &gDoubleCapabilities, NULL))) { vlog_error("ERROR: Unable to get device " "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n", @@ -699,19 +699,19 @@ test_status InitCL(cl_device_id device) } if (DOUBLE_REQUIRED_FEATURES - != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES)) + != (gDoubleCapabilities & DOUBLE_REQUIRED_FEATURES)) { std::string list; - if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, "; - if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST)) + if (0 == (gDoubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, "; + if (0 == (gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST)) list += "CL_FP_ROUND_TO_NEAREST, "; - if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO)) + if (0 == (gDoubleCapabilities & CL_FP_ROUND_TO_ZERO)) list += "CL_FP_ROUND_TO_ZERO, "; - if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF)) + if (0 == (gDoubleCapabilities & CL_FP_ROUND_TO_INF)) list += "CL_FP_ROUND_TO_INF, "; - if (0 == (doubleCapabilities & CL_FP_INF_NAN)) + if (0 == (gDoubleCapabilities & CL_FP_INF_NAN)) list += "CL_FP_INF_NAN, "; - if (0 == (doubleCapabilities & CL_FP_DENORM)) + if (0 == (gDoubleCapabilities & CL_FP_DENORM)) list += "CL_FP_DENORM, "; vlog_error("ERROR: required double features are missing: %s\n", list.c_str()); diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h index f735f9d307..f147a74dcd 100644 --- a/test_conformance/math_brute_force/utility.h +++ b/test_conformance/math_brute_force/utility.h @@ -64,6 +64,7 @@ extern int gFastRelaxedDerived; extern int gHostFill; extern int gIsInRTZMode; extern int gHasHalf; +extern int gHasDouble; extern int gInfNanSupport; extern int gIsEmbedded; extern int gVerboseBruteForce; @@ -71,6 +72,7 @@ extern uint32_t gMaxVectorSizeIndex; extern uint32_t gMinVectorSizeIndex; extern cl_device_fp_config gFloatCapabilities; extern cl_device_fp_config gHalfCapabilities; +extern cl_device_fp_config gDoubleCapabilities; extern RoundingMode gFloatToHalfRoundingMode; extern cl_half_rounding_mode gHalfRoundingMode; From 172c2150be9ecfbf1e7fdce14b6af31b7d7a153f Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 21 Apr 2026 09:51:16 +0200 Subject: [PATCH 2/7] corrections due to code review --- .../math_brute_force/edge_cases.cpp | 76 +++++++++++-------- test_conformance/math_brute_force/main.cpp | 2 +- test_conformance/math_brute_force/utility.h | 1 + 3 files changed, 48 insertions(+), 31 deletions(-) diff --git a/test_conformance/math_brute_force/edge_cases.cpp b/test_conformance/math_brute_force/edge_cases.cpp index 800a6d5027..655aea2d84 100644 --- a/test_conformance/math_brute_force/edge_cases.cpp +++ b/test_conformance/math_brute_force/edge_cases.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include namespace { @@ -161,13 +162,15 @@ struct AbstractEdgeCase std::vector inputs; AbstractValue expected; - bool expect_nan = false; // check isnan() instead of memcmp + bool expect_nan = false; bool requires_inf_nan = false; // CL_FP_INF_NAN bool requires_denorm = false; // CL_FP_DENORM bool requires_rte = false; // CL_FP_ROUND_TO_NEAREST }; +// Taken from OpenCL C Section 7.5.1. Additional Requirements Beyond C99 TC2 + static const AbstractEdgeCase edge_case_table[] = { { "acospi", { ONE }, POS_ZERO }, @@ -486,10 +489,9 @@ inline cl_int run_edge_case(const EdgeCaseSpec &ec, cl_context context, for (std::size_t i = 0; i < ec.inputs.size(); ++i) { if (i) log_error(", "); - log_error("0x"); log_anyvalue(ec.inputs[i]); } - log_error(") — expected NaN, got 0x"); + log_error(") - expected NaN, got 0x"); for (std::size_t i = 0; i < ec.expected.byte_size; ++i) log_error("%02x", result[i]); log_error("\n"); @@ -506,7 +508,6 @@ inline cl_int run_edge_case(const EdgeCaseSpec &ec, cl_context context, for (std::size_t i = 0; i < ec.inputs.size(); ++i) { if (i) log_error(", "); - log_error("0x"); log_anyvalue(ec.inputs[i]); } log_error(") - expected 0x"); @@ -526,7 +527,6 @@ inline cl_int run_edge_case(const EdgeCaseSpec &ec, cl_context context, template AnyValue abstract_to_anyvalue(const AbstractValue &av) { - // Int jest niezależny od precyzji — zawsze cl_int if (av.kind == AbstractValue::Kind::Int) return AnyValue::make(av.i); @@ -595,45 +595,52 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, cl_context context, cl_command_queue queue) { cl_int overall = CL_SUCCESS; - log_info("float\n"); - for (std::size_t i = 0; i < count; ++i) + if (gTestFloat) { - auto aec = cases[i]; - if (gIsEmbedded) + log_info("float test\n"); + for (std::size_t i = 0; i < count; ++i) { - if (aec.requires_denorm && !(gFloatCapabilities & CL_FP_DENORM)) + auto &aec = cases[i]; + if (gIsEmbedded) { - log_info("SKIP (no CL_FP_DENORM): %s\n", aec.func_name); - continue; - } + if (aec.requires_denorm && !(gFloatCapabilities & CL_FP_DENORM)) + { + log_info("SKIP (no CL_FP_DENORM): %s\n", aec.func_name); + continue; + } - if (aec.requires_inf_nan && !(gFloatCapabilities & CL_FP_INF_NAN)) - { - log_info("SKIP (no CL_FP_INF_NAN): %s\n", aec.func_name); - continue; - } + if (aec.requires_inf_nan + && !(gFloatCapabilities & CL_FP_INF_NAN)) + { + log_info("SKIP (no CL_FP_INF_NAN): %s\n", aec.func_name); + continue; + } - if (aec.requires_rte - && !(gFloatCapabilities & CL_FP_ROUND_TO_NEAREST)) - { - log_info("SKIP (no CL_FP_ROUND_TO_NEAREST): %s\n", - aec.func_name); - continue; + if (aec.requires_rte + && !(gFloatCapabilities & CL_FP_ROUND_TO_NEAREST)) + { + log_info("SKIP (no CL_FP_ROUND_TO_NEAREST): %s\n", + aec.func_name); + continue; + } } - } - EdgeCaseSpec ec = make_edge_case(aec); + EdgeCaseSpec ec = make_edge_case(aec); - if (run_edge_case(ec, context, queue) != CL_SUCCESS) - overall = -1; + if (run_edge_case(ec, context, queue) != CL_SUCCESS) + overall = -1; + } } + else + log_info("skipping float test\n"); + if (gHasHalf) { log_info("half\n"); for (std::size_t i = 0; i < count; ++i) { - auto aec = cases[i]; + auto &aec = cases[i]; if (aec.requires_denorm && !(gHalfCapabilities & CL_FP_DENORM)) { log_info("SKIP fp16 (no CL_FP_DENORM): %s\n", aec.func_name); @@ -660,13 +667,15 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, overall = -1; } } + else + log_info("skipping half test\n"); if (gHasDouble) { log_info("double\n"); for (std::size_t i = 0; i < count; ++i) { - auto aec = cases[i]; + auto &aec = cases[i]; if (aec.requires_denorm && !(gDoubleCapabilities & CL_FP_DENORM)) { @@ -694,6 +703,8 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, overall = -1; } } + else + log_info("skipping double test\n"); return overall; } @@ -702,6 +713,11 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, REGISTER_TEST(math_edge_cases) { + if (gSkipCorrectnessTesting) + { + log_info("Skipping math_edge_cases test\n"); + return TEST_SKIPPED_ITSELF; + } return run_edge_cases( edge_case_table, sizeof(edge_case_table) / sizeof((edge_case_table)[0]), gContext, gQueue); diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 4ae5a185da..9c2770b2a5 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -68,7 +68,7 @@ static bool gSkipRestOfTests; int gForceFTZ = 0; int gHostFill = 0; int gHasDouble = 0; -static int gTestFloat = 1; +int gTestFloat = 1; // This flag should be 'ON' by default and it can be changed through the command // line arguments. static int gTestFastRelaxed = 1; diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h index f147a74dcd..16158133d2 100644 --- a/test_conformance/math_brute_force/utility.h +++ b/test_conformance/math_brute_force/utility.h @@ -65,6 +65,7 @@ extern int gHostFill; extern int gIsInRTZMode; extern int gHasHalf; extern int gHasDouble; +extern int gTestFloat; extern int gInfNanSupport; extern int gIsEmbedded; extern int gVerboseBruteForce; From dc2f1b26183404bc3327338befa4f07622a56a32 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 21 Apr 2026 13:07:56 +0200 Subject: [PATCH 3/7] more corrections due to code review of math_brute_force --- test_conformance/math_brute_force/edge_cases.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test_conformance/math_brute_force/edge_cases.cpp b/test_conformance/math_brute_force/edge_cases.cpp index 655aea2d84..834ea0de6d 100644 --- a/test_conformance/math_brute_force/edge_cases.cpp +++ b/test_conformance/math_brute_force/edge_cases.cpp @@ -600,7 +600,7 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, log_info("float test\n"); for (std::size_t i = 0; i < count; ++i) { - auto &aec = cases[i]; + const auto &aec = cases[i]; if (gIsEmbedded) { if (aec.requires_denorm && !(gFloatCapabilities & CL_FP_DENORM)) @@ -625,7 +625,7 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, } } - EdgeCaseSpec ec = make_edge_case(aec); + const EdgeCaseSpec ec = make_edge_case(aec); if (run_edge_case(ec, context, queue) != CL_SUCCESS) overall = -1; @@ -640,7 +640,7 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, log_info("half\n"); for (std::size_t i = 0; i < count; ++i) { - auto &aec = cases[i]; + const auto &aec = cases[i]; if (aec.requires_denorm && !(gHalfCapabilities & CL_FP_DENORM)) { log_info("SKIP fp16 (no CL_FP_DENORM): %s\n", aec.func_name); @@ -661,7 +661,7 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, continue; } - EdgeCaseSpec ec = make_edge_case(aec); + const EdgeCaseSpec ec = make_edge_case(aec); if (run_edge_case(ec, context, queue) != CL_SUCCESS) overall = -1; @@ -675,7 +675,7 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, log_info("double\n"); for (std::size_t i = 0; i < count; ++i) { - auto &aec = cases[i]; + const auto &aec = cases[i]; if (aec.requires_denorm && !(gDoubleCapabilities & CL_FP_DENORM)) { @@ -697,7 +697,7 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, continue; } - EdgeCaseSpec ec = make_edge_case(aec); + const EdgeCaseSpec ec = make_edge_case(aec); if (run_edge_case(ec, context, queue) != CL_SUCCESS) overall = -1; From 5bbab8d500454a7d9cd44db6487a33f68484fcb2 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Wed, 22 Apr 2026 14:48:35 +0200 Subject: [PATCH 4/7] edge-case kernels batched into groups --- .../math_brute_force/edge_cases.cpp | 270 +++++++++++------- 1 file changed, 173 insertions(+), 97 deletions(-) diff --git a/test_conformance/math_brute_force/edge_cases.cpp b/test_conformance/math_brute_force/edge_cases.cpp index 834ea0de6d..0d222d563c 100644 --- a/test_conformance/math_brute_force/edge_cases.cpp +++ b/test_conformance/math_brute_force/edge_cases.cpp @@ -105,6 +105,9 @@ struct EdgeCaseSpec bool expect_nan = false; }; +std::vector batch_cases; +std::string kernel_src; + struct AbstractValue { enum class Kind @@ -320,33 +323,6 @@ static const AbstractEdgeCase edge_case_table[] = { { "trunc", { AV_F(-0.25) }, NEG_ZERO, false, false, false, true }, }; -inline std::string build_kernel_source(const EdgeCaseSpec &ec) -{ - std::string s; - s += "__kernel void test_edge_case(\n"; - s += " __global "; - s += ec.expected.cl_type; - s += " *out"; - for (std::size_t i = 0; i < ec.inputs.size(); ++i) - { - s += ",\n __global const "; - s += ec.inputs[i].cl_type; - s += " *in"; - s += std::to_string(i); - } - s += ")\n{\n *out = "; - s += ec.func_name; - s += "("; - for (std::size_t i = 0; i < ec.inputs.size(); ++i) - { - if (i) s += ", "; - s += "*in"; - s += std::to_string(i); - } - s += ");\n}\n"; - return s; -} - void log_anyvalue(const AnyValue &v) { const std::size_t elem = [&] { @@ -382,12 +358,52 @@ void log_anyvalue(const AnyValue &v) } } -template -inline cl_int run_edge_case(const EdgeCaseSpec &ec, cl_context context, - cl_command_queue queue) +inline void accumulate_edge_case(const EdgeCaseSpec &ec) { + std::string &src = kernel_src; + std::size_t ind = batch_cases.size(); // Build kernel - std::string src = build_kernel_source(ec); + if (src.empty()) + { + src += "__kernel void test_edge_case(\n"; + src += " __global "; + src += ec.expected.cl_type; + src += " *out"; + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + src += ",\n __global const "; + src += ec.inputs[i].cl_type; + src += " *in"; + src += std::to_string(i); + } + src += ")\n{"; + } + + src += "\n out["; + src += std::to_string(ind); + src += "] = "; + src += ec.func_name; + src += "("; + + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + if (i) src += ", "; + src += "in"; + src += std::to_string(i); + src += "["; + src += std::to_string(ind); + src += "]"; + } + src += ");"; + + batch_cases.push_back(ec); +} + +template +inline cl_int run_accumulated_cases(cl_context context, cl_command_queue queue) +{ + std::string &src = kernel_src; + src += "\n}\n"; if constexpr (std::is_same_v) src = std::string("#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n") + src; @@ -402,44 +418,59 @@ inline cl_int run_edge_case(const EdgeCaseSpec &ec, cl_context context, "test_edge_case")) { log_error("ERROR: Failed to build kernel for '%s'\nSource:\n%s\n", - ec.func_name, src.c_str()); + batch_cases.front().func_name, src.c_str()); return TEST_FAIL; } cl_int err = CL_SUCCESS; clMemWrapper out_buf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - ec.expected.byte_size, nullptr, &err); + batch_cases.front().expected.byte_size + * batch_cases.size(), + nullptr, &err); if (err != CL_SUCCESS) { log_error("ERROR: clCreateBuffer (out) failed for '%s': %d\n", - ec.func_name, err); + batch_cases.front().func_name, err); return TEST_FAIL; } std::vector in_bufs; - in_bufs.reserve(ec.inputs.size()); + in_bufs.reserve(batch_cases.front().inputs.size()); - for (std::size_t i = 0; i < ec.inputs.size(); ++i) + for (std::size_t i = 0; i < batch_cases.front().inputs.size(); ++i) { cl_mem buf = clCreateBuffer(context, CL_MEM_READ_ONLY, - ec.inputs[i].byte_size, nullptr, &err); + batch_cases.front().inputs[i].byte_size + * batch_cases.size(), + nullptr, &err); if (err != CL_SUCCESS) { log_error("ERROR: clCreateBuffer (in%zu) failed for '%s': %d\n", i, - ec.func_name, err); + batch_cases.front().func_name, err); return TEST_FAIL; } in_bufs.push_back(buf); - err = - clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, ec.inputs[i].byte_size, - ec.inputs[i].data.data(), 0, nullptr, nullptr); + static std::vector inData; + inData.resize(batch_cases.front().inputs[i].byte_size + * batch_cases.size()); + + size_t byte_offset = 0; + for (auto &elem : batch_cases) + { + std::memcpy(&inData[byte_offset], elem.inputs[i].data.data(), + elem.inputs[i].byte_size); + byte_offset += elem.inputs[i].byte_size; + } + + err = clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, inData.size(), + inData.data(), 0, nullptr, nullptr); if (err != CL_SUCCESS) { log_error("ERROR: clEnqueueWriteBuffer (in%zu) failed for" " '%s': %d\n", - i, ec.func_name, err); + i, batch_cases.front().func_name, err); return TEST_FAIL; } } @@ -449,8 +480,8 @@ inline cl_int run_edge_case(const EdgeCaseSpec &ec, cl_context context, err |= clSetKernelArg(kernel, i + 1, sizeof(cl_mem), &in_bufs[i]); if (err != CL_SUCCESS) { - log_error("ERROR: clSetKernelArg failed for '%s': %d\n", ec.func_name, - err); + log_error("ERROR: clSetKernelArg failed for '%s': %d\n", + batch_cases.front().func_name, err); return TEST_FAIL; } @@ -461,64 +492,76 @@ inline cl_int run_edge_case(const EdgeCaseSpec &ec, cl_context context, if (err != CL_SUCCESS) { log_error("ERROR: clEnqueueNDRangeKernel failed for '%s': %d\n", - ec.func_name, err); + batch_cases.front().func_name, err); return TEST_FAIL; } } { - std::array result{}; + static std::vector result; + result.resize(batch_cases.front().expected.byte_size + * batch_cases.size()); err = clEnqueueReadBuffer(queue, out_buf, CL_TRUE, 0, - ec.expected.byte_size, result.data(), 0, - nullptr, nullptr); + batch_cases.front().expected.byte_size + * batch_cases.size(), + result.data(), 0, nullptr, nullptr); if (err != CL_SUCCESS) { log_error("ERROR: clEnqueueReadBuffer failed for '%s': %d\n", - ec.func_name, err); + batch_cases.front().func_name, err); return TEST_FAIL; } - if (ec.expect_nan) + size_t byte_offset = 0; + for (const auto &ec : batch_cases) { - AnyValue got; - got.byte_size = ec.expected.byte_size; - std::memcpy(got.data.data(), result.data(), got.byte_size); - if (!got.all_elements_nan()) + if (ec.expect_nan) { - log_error("FAIL: %s(", ec.func_name); - for (std::size_t i = 0; i < ec.inputs.size(); ++i) + AnyValue got; + got.byte_size = ec.expected.byte_size; + std::memcpy(got.data.data(), &result[byte_offset], + got.byte_size); + if (!got.all_elements_nan()) { - if (i) log_error(", "); - log_anyvalue(ec.inputs[i]); + log_error("FAIL: %s(", ec.func_name); + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + if (i) log_error(", "); + log_anyvalue(ec.inputs[i]); + } + log_error(") - expected NaN, got 0x"); + + for (std::size_t i = ec.expected.byte_size; i !=0; --i) + log_error("%02x", result[byte_offset + i - 1]); + log_error("\n"); + err = -1; } - log_error(") - expected NaN, got 0x"); - for (std::size_t i = 0; i < ec.expected.byte_size; ++i) - log_error("%02x", result[i]); - log_error("\n"); - err = -1; } - } - else - { - if (std::memcmp(result.data(), ec.expected.data.data(), - ec.expected.byte_size) - != 0) + else { - log_error("FAIL: %s(", ec.func_name); - for (std::size_t i = 0; i < ec.inputs.size(); ++i) + if (std::memcmp(&result[byte_offset], ec.expected.data.data(), + ec.expected.byte_size) + != 0) { - if (i) log_error(", "); - log_anyvalue(ec.inputs[i]); + log_error("FAIL: %s(", ec.func_name); + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + if (i) log_error(", "); + log_anyvalue(ec.inputs[i]); + } + + log_error(") - expected "); + log_anyvalue(ec.expected); + + log_error(", got 0x"); + for (std::size_t i = ec.expected.byte_size; i !=0; --i) + log_error("%02x", result[byte_offset + i - 1]); + log_error("\n"); + err = -1; } - log_error(") - expected 0x"); - for (std::size_t i = 0; i < ec.expected.byte_size; ++i) - log_error("%02x", ec.expected.data[i]); - log_error(", got 0x"); - for (std::size_t i = 0; i < ec.expected.byte_size; ++i) - log_error("%02x", result[i]); - log_error("\n"); - err = -1; } + + byte_offset += ec.expected.byte_size; } } @@ -591,6 +634,23 @@ template EdgeCaseSpec make_edge_case(const AbstractEdgeCase &aec) return ec; } +template +cl_int flush_group (const AbstractEdgeCase *cases, std::size_t count, + cl_context context, cl_command_queue queue, std::size_t i) +{ + cl_int ret=0; + if (!batch_cases.empty() && + ((i == count - 1) || + std::strcmp(cases[i].func_name, cases[i+1].func_name) != 0)) + { + if (run_accumulated_cases(context, queue) != CL_SUCCESS) + ret = -1; + batch_cases.clear(); + kernel_src.clear(); + } + return ret; +} + inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, cl_context context, cl_command_queue queue) { @@ -598,22 +658,24 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, if (gTestFloat) { log_info("float test\n"); + for (std::size_t i = 0; i < count; ++i) { const auto &aec = cases[i]; + bool skip=false; if (gIsEmbedded) { if (aec.requires_denorm && !(gFloatCapabilities & CL_FP_DENORM)) { log_info("SKIP (no CL_FP_DENORM): %s\n", aec.func_name); - continue; + skip=true; } if (aec.requires_inf_nan && !(gFloatCapabilities & CL_FP_INF_NAN)) { log_info("SKIP (no CL_FP_INF_NAN): %s\n", aec.func_name); - continue; + skip=true; } if (aec.requires_rte @@ -621,13 +683,17 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, { log_info("SKIP (no CL_FP_ROUND_TO_NEAREST): %s\n", aec.func_name); - continue; + skip=true; } } - const EdgeCaseSpec ec = make_edge_case(aec); + if(!skip) + { + const EdgeCaseSpec ec = make_edge_case(aec); + accumulate_edge_case(ec); + } - if (run_edge_case(ec, context, queue) != CL_SUCCESS) + if(flush_group(cases, count, context, queue, i)!=CL_SUCCESS) overall = -1; } } @@ -641,16 +707,17 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, for (std::size_t i = 0; i < count; ++i) { const auto &aec = cases[i]; + bool skip=false; if (aec.requires_denorm && !(gHalfCapabilities & CL_FP_DENORM)) { log_info("SKIP fp16 (no CL_FP_DENORM): %s\n", aec.func_name); - continue; + skip=true; } if (aec.requires_inf_nan && !(gHalfCapabilities & CL_FP_INF_NAN)) { log_info("SKIP fp16 (no CL_FP_INF_NAN): %s\n", aec.func_name); - continue; + skip=true; } if (aec.requires_rte @@ -658,12 +725,17 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, { log_info("SKIP fp16 (no CL_FP_ROUND_TO_NEAREST): %s\n", aec.func_name); - continue; + skip=true; } - const EdgeCaseSpec ec = make_edge_case(aec); + if (!skip) + { + const EdgeCaseSpec ec = make_edge_case(aec); + accumulate_edge_case(ec); + } - if (run_edge_case(ec, context, queue) != CL_SUCCESS) + if (flush_group(cases, count, context, queue, i) + != CL_SUCCESS) overall = -1; } } @@ -676,17 +748,17 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, for (std::size_t i = 0; i < count; ++i) { const auto &aec = cases[i]; - + bool skip=false; if (aec.requires_denorm && !(gDoubleCapabilities & CL_FP_DENORM)) { log_info("SKIP fp64 (no CL_FP_DENORM): %s\n", aec.func_name); - continue; + skip=true; } if (aec.requires_inf_nan && !(gDoubleCapabilities & CL_FP_INF_NAN)) { log_info("SKIP fp64 (no CL_FP_INF_NAN): %s\n", aec.func_name); - continue; + skip=true; } if (aec.requires_rte @@ -694,12 +766,16 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, { log_info("SKIP fp64 (no CL_FP_ROUND_TO_NEAREST): %s\n", aec.func_name); - continue; + skip=true; } - const EdgeCaseSpec ec = make_edge_case(aec); + if(!skip) + { + const EdgeCaseSpec ec = make_edge_case(aec); + accumulate_edge_case(ec); + } - if (run_edge_case(ec, context, queue) != CL_SUCCESS) + if(flush_group(cases, count, context, queue, i)!=CL_SUCCESS) overall = -1; } } From 271d3fdb1f763190d2cec63d7279266009d5d535 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 23 Apr 2026 09:05:31 +0200 Subject: [PATCH 5/7] fix code format --- .../math_brute_force/edge_cases.cpp | 53 ++++++++++--------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/test_conformance/math_brute_force/edge_cases.cpp b/test_conformance/math_brute_force/edge_cases.cpp index 0d222d563c..ec96a1d7e4 100644 --- a/test_conformance/math_brute_force/edge_cases.cpp +++ b/test_conformance/math_brute_force/edge_cases.cpp @@ -531,7 +531,7 @@ inline cl_int run_accumulated_cases(cl_context context, cl_command_queue queue) } log_error(") - expected NaN, got 0x"); - for (std::size_t i = ec.expected.byte_size; i !=0; --i) + for (std::size_t i = ec.expected.byte_size; i != 0; --i) log_error("%02x", result[byte_offset + i - 1]); log_error("\n"); err = -1; @@ -554,7 +554,7 @@ inline cl_int run_accumulated_cases(cl_context context, cl_command_queue queue) log_anyvalue(ec.expected); log_error(", got 0x"); - for (std::size_t i = ec.expected.byte_size; i !=0; --i) + for (std::size_t i = ec.expected.byte_size; i != 0; --i) log_error("%02x", result[byte_offset + i - 1]); log_error("\n"); err = -1; @@ -635,16 +635,15 @@ template EdgeCaseSpec make_edge_case(const AbstractEdgeCase &aec) } template -cl_int flush_group (const AbstractEdgeCase *cases, std::size_t count, - cl_context context, cl_command_queue queue, std::size_t i) +cl_int flush_group(const AbstractEdgeCase *cases, std::size_t count, + cl_context context, cl_command_queue queue, std::size_t i) { - cl_int ret=0; - if (!batch_cases.empty() && - ((i == count - 1) || - std::strcmp(cases[i].func_name, cases[i+1].func_name) != 0)) + cl_int ret = 0; + if (!batch_cases.empty() + && ((i == count - 1) + || std::strcmp(cases[i].func_name, cases[i + 1].func_name) != 0)) { - if (run_accumulated_cases(context, queue) != CL_SUCCESS) - ret = -1; + if (run_accumulated_cases(context, queue) != CL_SUCCESS) ret = -1; batch_cases.clear(); kernel_src.clear(); } @@ -662,20 +661,20 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, for (std::size_t i = 0; i < count; ++i) { const auto &aec = cases[i]; - bool skip=false; + bool skip = false; if (gIsEmbedded) { if (aec.requires_denorm && !(gFloatCapabilities & CL_FP_DENORM)) { log_info("SKIP (no CL_FP_DENORM): %s\n", aec.func_name); - skip=true; + skip = true; } if (aec.requires_inf_nan && !(gFloatCapabilities & CL_FP_INF_NAN)) { log_info("SKIP (no CL_FP_INF_NAN): %s\n", aec.func_name); - skip=true; + skip = true; } if (aec.requires_rte @@ -683,17 +682,18 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, { log_info("SKIP (no CL_FP_ROUND_TO_NEAREST): %s\n", aec.func_name); - skip=true; + skip = true; } } - if(!skip) + if (!skip) { const EdgeCaseSpec ec = make_edge_case(aec); accumulate_edge_case(ec); } - if(flush_group(cases, count, context, queue, i)!=CL_SUCCESS) + if (flush_group(cases, count, context, queue, i) + != CL_SUCCESS) overall = -1; } } @@ -707,17 +707,17 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, for (std::size_t i = 0; i < count; ++i) { const auto &aec = cases[i]; - bool skip=false; + bool skip = false; if (aec.requires_denorm && !(gHalfCapabilities & CL_FP_DENORM)) { log_info("SKIP fp16 (no CL_FP_DENORM): %s\n", aec.func_name); - skip=true; + skip = true; } if (aec.requires_inf_nan && !(gHalfCapabilities & CL_FP_INF_NAN)) { log_info("SKIP fp16 (no CL_FP_INF_NAN): %s\n", aec.func_name); - skip=true; + skip = true; } if (aec.requires_rte @@ -725,7 +725,7 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, { log_info("SKIP fp16 (no CL_FP_ROUND_TO_NEAREST): %s\n", aec.func_name); - skip=true; + skip = true; } if (!skip) @@ -748,17 +748,17 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, for (std::size_t i = 0; i < count; ++i) { const auto &aec = cases[i]; - bool skip=false; + bool skip = false; if (aec.requires_denorm && !(gDoubleCapabilities & CL_FP_DENORM)) { log_info("SKIP fp64 (no CL_FP_DENORM): %s\n", aec.func_name); - skip=true; + skip = true; } if (aec.requires_inf_nan && !(gDoubleCapabilities & CL_FP_INF_NAN)) { log_info("SKIP fp64 (no CL_FP_INF_NAN): %s\n", aec.func_name); - skip=true; + skip = true; } if (aec.requires_rte @@ -766,16 +766,17 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, { log_info("SKIP fp64 (no CL_FP_ROUND_TO_NEAREST): %s\n", aec.func_name); - skip=true; + skip = true; } - if(!skip) + if (!skip) { const EdgeCaseSpec ec = make_edge_case(aec); accumulate_edge_case(ec); } - if(flush_group(cases, count, context, queue, i)!=CL_SUCCESS) + if (flush_group(cases, count, context, queue, i) + != CL_SUCCESS) overall = -1; } } From 89340db4edebf932150e9b8b0bd537480ce6ec83 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Fri, 24 Apr 2026 10:30:17 +0200 Subject: [PATCH 6/7] corrections due to code review related to grouped kernel execution --- test_conformance/math_brute_force/edge_cases.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test_conformance/math_brute_force/edge_cases.cpp b/test_conformance/math_brute_force/edge_cases.cpp index ec96a1d7e4..fb041790bc 100644 --- a/test_conformance/math_brute_force/edge_cases.cpp +++ b/test_conformance/math_brute_force/edge_cases.cpp @@ -378,6 +378,10 @@ inline void accumulate_edge_case(const EdgeCaseSpec &ec) } src += ")\n{"; } + else + { + assert(batch_cases.front().inputs.size() == ec.inputs.size()); + } src += "\n out["; src += std::to_string(ind); @@ -457,7 +461,7 @@ inline cl_int run_accumulated_cases(cl_context context, cl_command_queue queue) * batch_cases.size()); size_t byte_offset = 0; - for (auto &elem : batch_cases) + for (const auto &elem : batch_cases) { std::memcpy(&inData[byte_offset], elem.inputs[i].data.data(), elem.inputs[i].byte_size); @@ -656,7 +660,11 @@ inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, cl_int overall = CL_SUCCESS; if (gTestFloat) { - log_info("float test\n"); + log_info("float\n"); + + // Iterate over edge cases, grouping those with the same function name + // into a single kernel call to avoid per-case build overhead. The same + // pattern is applied for all three floating point precisions. for (std::size_t i = 0; i < count; ++i) { From 47ce955af329fb4317443ce1a16e1f62ef1a9a1d Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 30 Apr 2026 17:42:04 +0200 Subject: [PATCH 7/7] corrections due to code review --- .../math_brute_force/edge_cases.cpp | 785 +++++++++--------- 1 file changed, 406 insertions(+), 379 deletions(-) diff --git a/test_conformance/math_brute_force/edge_cases.cpp b/test_conformance/math_brute_force/edge_cases.cpp index fb041790bc..db0b56e838 100644 --- a/test_conformance/math_brute_force/edge_cases.cpp +++ b/test_conformance/math_brute_force/edge_cases.cpp @@ -105,9 +105,6 @@ struct EdgeCaseSpec bool expect_nan = false; }; -std::vector batch_cases; -std::string kernel_src; - struct AbstractValue { enum class Kind @@ -174,7 +171,7 @@ struct AbstractEdgeCase // Taken from OpenCL C Section 7.5.1. Additional Requirements Beyond C99 TC2 -static const AbstractEdgeCase edge_case_table[] = { +const AbstractEdgeCase edge_case_table[] = { { "acospi", { ONE }, POS_ZERO }, { "acospi", { AV_F(2) }, NAN_V, true, true }, @@ -323,476 +320,505 @@ static const AbstractEdgeCase edge_case_table[] = { { "trunc", { AV_F(-0.25) }, NEG_ZERO, false, false, false, true }, }; -void log_anyvalue(const AnyValue &v) +struct EdgeCasesTest { - const std::size_t elem = [&] { - if (v.cl_type == "double") return std::size_t(8); - if (v.cl_type == "float") return std::size_t(4); - if (v.cl_type == "int") return std::size_t(4); - return std::size_t(2); // half - }(); - - for (std::size_t off = 0; off < v.byte_size; off += elem) + std::vector batch_cases; + std::string kernel_src; + std::vector inData; + std::vector result; + + void log_anyvalue(const AnyValue &v) { - switch (elem) + const std::size_t elem = [&] { + if (v.cl_type == "double") return std::size_t(8); + if (v.cl_type == "float") return std::size_t(4); + if (v.cl_type == "int") return std::size_t(4); + return std::size_t(2); // half + }(); + + for (std::size_t off = 0; off < v.byte_size; off += elem) { - case 8: { - uint64_t bits; - std::memcpy(&bits, v.data.data() + off, 8); - log_error("0x%016" PRIx64, bits); - break; - } - case 4: { - uint32_t bits; - std::memcpy(&bits, v.data.data() + off, 4); - log_error("0x%08" PRIx32, bits); - break; - } - case 2: { - uint16_t bits; - std::memcpy(&bits, v.data.data() + off, 2); - log_error("0x%04" PRIx16, bits); - break; + switch (elem) + { + case 8: { + uint64_t bits; + std::memcpy(&bits, v.data.data() + off, 8); + log_error("0x%016" PRIx64, bits); + break; + } + case 4: { + uint32_t bits; + std::memcpy(&bits, v.data.data() + off, 4); + log_error("0x%08" PRIx32, bits); + break; + } + case 2: { + uint16_t bits; + std::memcpy(&bits, v.data.data() + off, 2); + log_error("0x%04" PRIx16, bits); + break; + } } } } -} -inline void accumulate_edge_case(const EdgeCaseSpec &ec) -{ - std::string &src = kernel_src; - std::size_t ind = batch_cases.size(); - // Build kernel - if (src.empty()) + inline void accumulate_edge_case(const EdgeCaseSpec &ec) { - src += "__kernel void test_edge_case(\n"; - src += " __global "; - src += ec.expected.cl_type; - src += " *out"; - for (std::size_t i = 0; i < ec.inputs.size(); ++i) + std::string &src = kernel_src; + std::size_t ind = batch_cases.size(); + // Build kernel + if (src.empty()) { - src += ",\n __global const "; - src += ec.inputs[i].cl_type; - src += " *in"; - src += std::to_string(i); + src += "__kernel void test_edge_case(\n"; + src += " __global "; + src += ec.expected.cl_type; + src += " *out"; + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + src += ",\n __global const "; + src += ec.inputs[i].cl_type; + src += " *in"; + src += std::to_string(i); + } + src += ")\n{"; + } + else + { + assert(batch_cases.front().inputs.size() == ec.inputs.size()); } - src += ")\n{"; - } - else - { - assert(batch_cases.front().inputs.size() == ec.inputs.size()); - } - - src += "\n out["; - src += std::to_string(ind); - src += "] = "; - src += ec.func_name; - src += "("; - for (std::size_t i = 0; i < ec.inputs.size(); ++i) - { - if (i) src += ", "; - src += "in"; - src += std::to_string(i); - src += "["; + src += "\n out["; src += std::to_string(ind); - src += "]"; - } - src += ");"; + src += "] = "; + src += ec.func_name; + src += "("; - batch_cases.push_back(ec); -} + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + if (i) src += ", "; + src += "in"; + src += std::to_string(i); + src += "["; + src += std::to_string(ind); + src += "]"; + } + src += ");"; -template -inline cl_int run_accumulated_cases(cl_context context, cl_command_queue queue) -{ - std::string &src = kernel_src; - src += "\n}\n"; - if constexpr (std::is_same_v) - src = std::string("#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n") - + src; - else if constexpr (std::is_same_v) - src = std::string("#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n") - + src; - const char *src_ptr = src.c_str(); - clProgramWrapper program; - clKernelWrapper kernel; - - if (create_single_kernel_helper(context, &program, &kernel, 1, &src_ptr, - "test_edge_case")) - { - log_error("ERROR: Failed to build kernel for '%s'\nSource:\n%s\n", - batch_cases.front().func_name, src.c_str()); - return TEST_FAIL; + batch_cases.push_back(ec); } - cl_int err = CL_SUCCESS; - - clMemWrapper out_buf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - batch_cases.front().expected.byte_size - * batch_cases.size(), - nullptr, &err); - if (err != CL_SUCCESS) + template + inline cl_int run_accumulated_cases(cl_context context, + cl_command_queue queue) { - log_error("ERROR: clCreateBuffer (out) failed for '%s': %d\n", - batch_cases.front().func_name, err); - return TEST_FAIL; - } + std::string &src = kernel_src; + src += "\n}\n"; + if constexpr (std::is_same_v) + src = std::string("#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n") + + src; + else if constexpr (std::is_same_v) + src = std::string("#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n") + + src; + const char *src_ptr = src.c_str(); + clProgramWrapper program; + clKernelWrapper kernel; + + if (create_single_kernel_helper(context, &program, &kernel, 1, &src_ptr, + "test_edge_case")) + { + log_error("ERROR: Failed to build kernel for '%s'\nSource:\n%s\n", + batch_cases.front().func_name, src.c_str()); + return TEST_FAIL; + } - std::vector in_bufs; - in_bufs.reserve(batch_cases.front().inputs.size()); + cl_int err = CL_SUCCESS; - for (std::size_t i = 0; i < batch_cases.front().inputs.size(); ++i) - { - cl_mem buf = clCreateBuffer(context, CL_MEM_READ_ONLY, - batch_cases.front().inputs[i].byte_size - * batch_cases.size(), - nullptr, &err); + clMemWrapper out_buf = clCreateBuffer( + context, CL_MEM_WRITE_ONLY, + batch_cases.front().expected.byte_size * batch_cases.size(), + nullptr, &err); if (err != CL_SUCCESS) { - log_error("ERROR: clCreateBuffer (in%zu) failed for '%s': %d\n", i, + log_error("ERROR: clCreateBuffer (out) failed for '%s': %d\n", batch_cases.front().func_name, err); return TEST_FAIL; } - in_bufs.push_back(buf); - static std::vector inData; - inData.resize(batch_cases.front().inputs[i].byte_size - * batch_cases.size()); + std::vector in_bufs; + in_bufs.reserve(batch_cases.front().inputs.size()); - size_t byte_offset = 0; - for (const auto &elem : batch_cases) + for (std::size_t i = 0; i < batch_cases.front().inputs.size(); ++i) { - std::memcpy(&inData[byte_offset], elem.inputs[i].data.data(), - elem.inputs[i].byte_size); - byte_offset += elem.inputs[i].byte_size; - } + cl_mem buf = clCreateBuffer(context, CL_MEM_READ_ONLY, + batch_cases.front().inputs[i].byte_size + * batch_cases.size(), + nullptr, &err); + if (err != CL_SUCCESS) + { + log_error("ERROR: clCreateBuffer (in%zu) failed for '%s': %d\n", + i, batch_cases.front().func_name, err); + return TEST_FAIL; + } + in_bufs.push_back(buf); - err = clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, inData.size(), - inData.data(), 0, nullptr, nullptr); - if (err != CL_SUCCESS) - { - log_error("ERROR: clEnqueueWriteBuffer (in%zu) failed for" - " '%s': %d\n", - i, batch_cases.front().func_name, err); - return TEST_FAIL; - } - } + inData.resize(batch_cases.front().inputs[i].byte_size + * batch_cases.size()); - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &out_buf); - for (cl_uint i = 0; i < static_cast(in_bufs.size()); ++i) - err |= clSetKernelArg(kernel, i + 1, sizeof(cl_mem), &in_bufs[i]); - if (err != CL_SUCCESS) - { - log_error("ERROR: clSetKernelArg failed for '%s': %d\n", - batch_cases.front().func_name, err); - return TEST_FAIL; - } + size_t byte_offset = 0; + for (const auto &elem : batch_cases) + { + std::memcpy(&inData[byte_offset], elem.inputs[i].data.data(), + elem.inputs[i].byte_size); + byte_offset += elem.inputs[i].byte_size; + } - { - const std::size_t gws = 1; - err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &gws, nullptr, - 0, nullptr, nullptr); + err = clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, inData.size(), + inData.data(), 0, nullptr, nullptr); + if (err != CL_SUCCESS) + { + log_error("ERROR: clEnqueueWriteBuffer (in%zu) failed for" + " '%s': %d\n", + i, batch_cases.front().func_name, err); + return TEST_FAIL; + } + } + + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &out_buf); + for (cl_uint i = 0; i < static_cast(in_bufs.size()); ++i) + err |= clSetKernelArg(kernel, i + 1, sizeof(cl_mem), &in_bufs[i]); if (err != CL_SUCCESS) { - log_error("ERROR: clEnqueueNDRangeKernel failed for '%s': %d\n", + log_error("ERROR: clSetKernelArg failed for '%s': %d\n", batch_cases.front().func_name, err); return TEST_FAIL; } - } - { - static std::vector result; - result.resize(batch_cases.front().expected.byte_size - * batch_cases.size()); - err = clEnqueueReadBuffer(queue, out_buf, CL_TRUE, 0, - batch_cases.front().expected.byte_size - * batch_cases.size(), - result.data(), 0, nullptr, nullptr); - if (err != CL_SUCCESS) { - log_error("ERROR: clEnqueueReadBuffer failed for '%s': %d\n", - batch_cases.front().func_name, err); - return TEST_FAIL; + const std::size_t gws = 1; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &gws, + nullptr, 0, nullptr, nullptr); + if (err != CL_SUCCESS) + { + log_error("ERROR: clEnqueueNDRangeKernel failed for '%s': %d\n", + batch_cases.front().func_name, err); + return TEST_FAIL; + } } - size_t byte_offset = 0; - for (const auto &ec : batch_cases) { - if (ec.expect_nan) + result.resize(batch_cases.front().expected.byte_size + * batch_cases.size()); + err = clEnqueueReadBuffer(queue, out_buf, CL_TRUE, 0, + batch_cases.front().expected.byte_size + * batch_cases.size(), + result.data(), 0, nullptr, nullptr); + if (err != CL_SUCCESS) + { + log_error("ERROR: clEnqueueReadBuffer failed for '%s': %d\n", + batch_cases.front().func_name, err); + return TEST_FAIL; + } + + size_t byte_offset = 0; + for (const auto &ec : batch_cases) { - AnyValue got; - got.byte_size = ec.expected.byte_size; - std::memcpy(got.data.data(), &result[byte_offset], - got.byte_size); - if (!got.all_elements_nan()) + if (ec.expect_nan) { - log_error("FAIL: %s(", ec.func_name); - for (std::size_t i = 0; i < ec.inputs.size(); ++i) + AnyValue got; + got.byte_size = ec.expected.byte_size; + std::memcpy(got.data.data(), &result[byte_offset], + got.byte_size); + if (!got.all_elements_nan()) { - if (i) log_error(", "); - log_anyvalue(ec.inputs[i]); + log_error("FAIL: %s(", ec.func_name); + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + if (i) log_error(", "); + log_anyvalue(ec.inputs[i]); + } + log_error(") - expected NaN, got 0x"); + + for (std::size_t i = ec.expected.byte_size; i != 0; --i) + log_error("%02x", result[byte_offset + i - 1]); + log_error("\n"); + err = -1; } - log_error(") - expected NaN, got 0x"); - - for (std::size_t i = ec.expected.byte_size; i != 0; --i) - log_error("%02x", result[byte_offset + i - 1]); - log_error("\n"); - err = -1; } - } - else - { - if (std::memcmp(&result[byte_offset], ec.expected.data.data(), - ec.expected.byte_size) - != 0) + else { - log_error("FAIL: %s(", ec.func_name); - for (std::size_t i = 0; i < ec.inputs.size(); ++i) + if (std::memcmp(&result[byte_offset], + ec.expected.data.data(), + ec.expected.byte_size) + != 0) { - if (i) log_error(", "); - log_anyvalue(ec.inputs[i]); + log_error("FAIL: %s(", ec.func_name); + for (std::size_t i = 0; i < ec.inputs.size(); ++i) + { + if (i) log_error(", "); + log_anyvalue(ec.inputs[i]); + } + + log_error(") - expected "); + log_anyvalue(ec.expected); + + log_error(", got 0x"); + for (std::size_t i = ec.expected.byte_size; i != 0; --i) + log_error("%02x", result[byte_offset + i - 1]); + log_error("\n"); + err = -1; } - - log_error(") - expected "); - log_anyvalue(ec.expected); - - log_error(", got 0x"); - for (std::size_t i = ec.expected.byte_size; i != 0; --i) - log_error("%02x", result[byte_offset + i - 1]); - log_error("\n"); - err = -1; } - } - byte_offset += ec.expected.byte_size; + byte_offset += ec.expected.byte_size; + } } - } - return err == -1 ? TEST_FAIL : TEST_PASS; -} - -template AnyValue abstract_to_anyvalue(const AbstractValue &av) -{ - if (av.kind == AbstractValue::Kind::Int) - return AnyValue::make(av.i); + return err == -1 ? TEST_FAIL : TEST_PASS; + } - if constexpr (std::is_same_v) + template AnyValue abstract_to_anyvalue(const AbstractValue &av) { - uint16_t bits = 0; - switch (av.kind) + if (av.kind == AbstractValue::Kind::Int) + return AnyValue::make(av.i); + + if constexpr (std::is_same_v) { - case AbstractValue::Kind::PosZero: bits = 0x0000; break; - case AbstractValue::Kind::NegZero: bits = 0x8000; break; - case AbstractValue::Kind::PosInf: bits = 0x7C00; break; - case AbstractValue::Kind::NegInf: bits = 0xFC00; break; - case AbstractValue::Kind::NaN: bits = 0x7E00; break; - case AbstractValue::Kind::SmallestPosDenorm: bits = 0x0001; break; - case AbstractValue::Kind::SmallestNegDenorm: bits = 0x8001; break; - case AbstractValue::Kind::Finite: - bits = - cl_half_from_float(static_cast(av.d), CL_HALF_RTE); - break; - default: break; + uint16_t bits = 0; + switch (av.kind) + { + case AbstractValue::Kind::PosZero: bits = 0x0000; break; + case AbstractValue::Kind::NegZero: bits = 0x8000; break; + case AbstractValue::Kind::PosInf: bits = 0x7C00; break; + case AbstractValue::Kind::NegInf: bits = 0xFC00; break; + case AbstractValue::Kind::NaN: bits = 0x7E00; break; + case AbstractValue::Kind::SmallestPosDenorm: + bits = 0x0001; + break; + case AbstractValue::Kind::SmallestNegDenorm: + bits = 0x8001; + break; + case AbstractValue::Kind::Finite: + bits = cl_half_from_float(static_cast(av.d), + CL_HALF_RTE); + break; + default: break; + } + return AnyValue::make(bits); } - return AnyValue::make(bits); - } - else - { - T val{}; - switch (av.kind) + else { - case AbstractValue::Kind::PosZero: val = T(0); break; - case AbstractValue::Kind::NegZero: val = -T(0); break; - case AbstractValue::Kind::PosInf: - val = std::numeric_limits::infinity(); - break; - case AbstractValue::Kind::NegInf: - val = -std::numeric_limits::infinity(); - break; - case AbstractValue::Kind::NaN: - val = std::numeric_limits::quiet_NaN(); - break; - case AbstractValue::Kind::Finite: val = static_cast(av.d); break; - case AbstractValue::Kind::SmallestPosDenorm: - val = std::numeric_limits::denorm_min(); - break; - case AbstractValue::Kind::SmallestNegDenorm: - val = -std::numeric_limits::denorm_min(); - break; - default: break; + T val{}; + switch (av.kind) + { + case AbstractValue::Kind::PosZero: val = T(0); break; + case AbstractValue::Kind::NegZero: val = -T(0); break; + case AbstractValue::Kind::PosInf: + val = std::numeric_limits::infinity(); + break; + case AbstractValue::Kind::NegInf: + val = -std::numeric_limits::infinity(); + break; + case AbstractValue::Kind::NaN: + val = std::numeric_limits::quiet_NaN(); + break; + case AbstractValue::Kind::Finite: + val = static_cast(av.d); + break; + case AbstractValue::Kind::SmallestPosDenorm: + val = std::numeric_limits::denorm_min(); + break; + case AbstractValue::Kind::SmallestNegDenorm: + val = -std::numeric_limits::denorm_min(); + break; + default: break; + } + return AnyValue::make(val); } - return AnyValue::make(val); } -} -template EdgeCaseSpec make_edge_case(const AbstractEdgeCase &aec) -{ - EdgeCaseSpec ec; - ec.expect_nan = aec.expect_nan; - ec.expected = abstract_to_anyvalue(aec.expected); - ec.inputs.reserve(aec.inputs.size()); - for (const auto &av : aec.inputs) - ec.inputs.push_back(abstract_to_anyvalue(av)); - ec.func_name = aec.func_name; - return ec; -} + template + EdgeCaseSpec make_edge_case(const AbstractEdgeCase &aec) + { + EdgeCaseSpec ec; + ec.expect_nan = aec.expect_nan; + ec.expected = abstract_to_anyvalue(aec.expected); + ec.inputs.reserve(aec.inputs.size()); + for (const auto &av : aec.inputs) + ec.inputs.push_back(abstract_to_anyvalue(av)); + ec.func_name = aec.func_name; + return ec; + } -template -cl_int flush_group(const AbstractEdgeCase *cases, std::size_t count, - cl_context context, cl_command_queue queue, std::size_t i) -{ - cl_int ret = 0; - if (!batch_cases.empty() - && ((i == count - 1) - || std::strcmp(cases[i].func_name, cases[i + 1].func_name) != 0)) + template + cl_int flush_group(const AbstractEdgeCase *cases, std::size_t count, + cl_context context, cl_command_queue queue, + std::size_t i) { - if (run_accumulated_cases(context, queue) != CL_SUCCESS) ret = -1; - batch_cases.clear(); - kernel_src.clear(); + cl_int ret = 0; + if (!batch_cases.empty() + && ((i == count - 1) + || std::strcmp(cases[i].func_name, cases[i + 1].func_name) + != 0)) + { + if (run_accumulated_cases(context, queue) != CL_SUCCESS) + ret = -1; + batch_cases.clear(); + kernel_src.clear(); + } + return ret; } - return ret; -} -inline cl_int run_edge_cases(const AbstractEdgeCase *cases, std::size_t count, - cl_context context, cl_command_queue queue) -{ - cl_int overall = CL_SUCCESS; - if (gTestFloat) + inline cl_int run_edge_cases(const AbstractEdgeCase *cases, + std::size_t count, cl_context context, + cl_command_queue queue) { - log_info("float\n"); + cl_int overall = CL_SUCCESS; + if (gTestFloat) + { + log_info("float\n"); + + // Iterate over edge cases, grouping those with the same function + // name into a single kernel call to avoid per-case build overhead. + // The same pattern is applied for all three floating point + // precisions. - // Iterate over edge cases, grouping those with the same function name - // into a single kernel call to avoid per-case build overhead. The same - // pattern is applied for all three floating point precisions. + for (std::size_t i = 0; i < count; ++i) + { + const auto &aec = cases[i]; + bool skip = false; + if (gIsEmbedded) + { + if (aec.requires_denorm + && !(gFloatCapabilities & CL_FP_DENORM)) + { + log_info("SKIP (no CL_FP_DENORM): %s\n", aec.func_name); + skip = true; + } - for (std::size_t i = 0; i < count; ++i) + if (aec.requires_inf_nan + && !(gFloatCapabilities & CL_FP_INF_NAN)) + { + log_info("SKIP (no CL_FP_INF_NAN): %s\n", + aec.func_name); + skip = true; + } + + if (aec.requires_rte + && !(gFloatCapabilities & CL_FP_ROUND_TO_NEAREST)) + { + log_info("SKIP (no CL_FP_ROUND_TO_NEAREST): %s\n", + aec.func_name); + skip = true; + } + } + + if (!skip) + { + const EdgeCaseSpec ec = make_edge_case(aec); + accumulate_edge_case(ec); + } + + if (flush_group(cases, count, context, queue, i) + != CL_SUCCESS) + overall = -1; + } + } + else + log_info("skipping float test\n"); + + + if (gHasHalf) { - const auto &aec = cases[i]; - bool skip = false; - if (gIsEmbedded) + log_info("half\n"); + for (std::size_t i = 0; i < count; ++i) { - if (aec.requires_denorm && !(gFloatCapabilities & CL_FP_DENORM)) + const auto &aec = cases[i]; + bool skip = false; + if (aec.requires_denorm && !(gHalfCapabilities & CL_FP_DENORM)) { - log_info("SKIP (no CL_FP_DENORM): %s\n", aec.func_name); + log_info("SKIP fp16 (no CL_FP_DENORM): %s\n", + aec.func_name); skip = true; } if (aec.requires_inf_nan - && !(gFloatCapabilities & CL_FP_INF_NAN)) + && !(gHalfCapabilities & CL_FP_INF_NAN)) { - log_info("SKIP (no CL_FP_INF_NAN): %s\n", aec.func_name); + log_info("SKIP fp16 (no CL_FP_INF_NAN): %s\n", + aec.func_name); skip = true; } if (aec.requires_rte - && !(gFloatCapabilities & CL_FP_ROUND_TO_NEAREST)) + && !(gHalfCapabilities & CL_FP_ROUND_TO_NEAREST)) { - log_info("SKIP (no CL_FP_ROUND_TO_NEAREST): %s\n", + log_info("SKIP fp16 (no CL_FP_ROUND_TO_NEAREST): %s\n", aec.func_name); skip = true; } - } - - if (!skip) - { - const EdgeCaseSpec ec = make_edge_case(aec); - accumulate_edge_case(ec); - } - - if (flush_group(cases, count, context, queue, i) - != CL_SUCCESS) - overall = -1; - } - } - else - log_info("skipping float test\n"); - - if (gHasHalf) - { - log_info("half\n"); - for (std::size_t i = 0; i < count; ++i) - { - const auto &aec = cases[i]; - bool skip = false; - if (aec.requires_denorm && !(gHalfCapabilities & CL_FP_DENORM)) - { - log_info("SKIP fp16 (no CL_FP_DENORM): %s\n", aec.func_name); - skip = true; - } - - if (aec.requires_inf_nan && !(gHalfCapabilities & CL_FP_INF_NAN)) - { - log_info("SKIP fp16 (no CL_FP_INF_NAN): %s\n", aec.func_name); - skip = true; - } - - if (aec.requires_rte - && !(gHalfCapabilities & CL_FP_ROUND_TO_NEAREST)) - { - log_info("SKIP fp16 (no CL_FP_ROUND_TO_NEAREST): %s\n", - aec.func_name); - skip = true; - } + if (!skip) + { + const EdgeCaseSpec ec = make_edge_case(aec); + accumulate_edge_case(ec); + } - if (!skip) - { - const EdgeCaseSpec ec = make_edge_case(aec); - accumulate_edge_case(ec); + if (flush_group(cases, count, context, queue, i) + != CL_SUCCESS) + overall = -1; } - - if (flush_group(cases, count, context, queue, i) - != CL_SUCCESS) - overall = -1; } - } - else - log_info("skipping half test\n"); + else + log_info("skipping half test\n"); - if (gHasDouble) - { - log_info("double\n"); - for (std::size_t i = 0; i < count; ++i) + if (gHasDouble) { - const auto &aec = cases[i]; - bool skip = false; - if (aec.requires_denorm && !(gDoubleCapabilities & CL_FP_DENORM)) + log_info("double\n"); + for (std::size_t i = 0; i < count; ++i) { - log_info("SKIP fp64 (no CL_FP_DENORM): %s\n", aec.func_name); - skip = true; - } + const auto &aec = cases[i]; + bool skip = false; + if (aec.requires_denorm + && !(gDoubleCapabilities & CL_FP_DENORM)) + { + log_info("SKIP fp64 (no CL_FP_DENORM): %s\n", + aec.func_name); + skip = true; + } - if (aec.requires_inf_nan && !(gDoubleCapabilities & CL_FP_INF_NAN)) - { - log_info("SKIP fp64 (no CL_FP_INF_NAN): %s\n", aec.func_name); - skip = true; - } + if (aec.requires_inf_nan + && !(gDoubleCapabilities & CL_FP_INF_NAN)) + { + log_info("SKIP fp64 (no CL_FP_INF_NAN): %s\n", + aec.func_name); + skip = true; + } - if (aec.requires_rte - && !(gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST)) - { - log_info("SKIP fp64 (no CL_FP_ROUND_TO_NEAREST): %s\n", - aec.func_name); - skip = true; - } + if (aec.requires_rte + && !(gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST)) + { + log_info("SKIP fp64 (no CL_FP_ROUND_TO_NEAREST): %s\n", + aec.func_name); + skip = true; + } - if (!skip) - { - const EdgeCaseSpec ec = make_edge_case(aec); - accumulate_edge_case(ec); - } + if (!skip) + { + const EdgeCaseSpec ec = make_edge_case(aec); + accumulate_edge_case(ec); + } - if (flush_group(cases, count, context, queue, i) - != CL_SUCCESS) - overall = -1; + if (flush_group(cases, count, context, queue, i) + != CL_SUCCESS) + overall = -1; + } } - } - else - log_info("skipping double test\n"); + else + log_info("skipping double test\n"); - return overall; -} + return overall; + } +}; } // anonymous namespace @@ -803,7 +829,8 @@ REGISTER_TEST(math_edge_cases) log_info("Skipping math_edge_cases test\n"); return TEST_SKIPPED_ITSELF; } - return run_edge_cases( + EdgeCasesTest edge_cases_test; + return edge_cases_test.run_edge_cases( edge_case_table, sizeof(edge_case_table) / sizeof((edge_case_table)[0]), gContext, gQueue); }