Skip to content

Commit 0cec4a4

Browse files
committed
fix(non_uniform_work_group): respect per-dimension work item size limits when selecting localSize
Problem: Test cases selected localSize based only on CL_KERNEL_WORK_GROUP_SIZE, without considering CL_DEVICE_MAX_WORK_ITEM_SIZES per-dimension limits. This caused issues because: 1. Constructor rounds globalSize based on the original localSize 2. prepareDevice() later trims _enqueuedLocalSize to device limits 3. Result: globalSize was rounded with wrong localSize, causing mismatch For example, with maxWorkItemSizes=[256,256,64] and localSize={512,1}: - globalSize rounded assuming localSize[0]=512 - prepareDevice() limits localSize[0] to 256 - globalSize and localSize no longer match Fix: Query CL_DEVICE_MAX_WORK_ITEM_SIZES in test cases and compute: effectiveMax = min(maxWgSize, maxWorkItemSizes[dim]) before selecting localSize for each dimension. This ensures localSize is valid before rounding, so globalSize and localSize remain consistent throughout the test. Affected files: - test_advanced_2d.cpp - test_advanced_3d.cpp - test_advanced_other.cpp - test_basic.cpp Signed-off-by: jiajia Qian <jiajia.qian@nxp.com>
1 parent d00c0c3 commit 0cec4a4

3 files changed

Lines changed: 72 additions & 18 deletions

File tree

test_conformance/non_uniform_work_group/test_advanced_2d.cpp

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,14 @@ REGISTER_TEST(non_uniform_2d_basic)
139139

140140
// non_uniform_2d_three_prime_numbers_basic
141141
{
142+
size_t maxWorkItemSizes[3];
143+
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
144+
sizeof(maxWorkItemSizes), maxWorkItemSizes, NULL);
145+
146+
size_t effectiveMax = std::min(maxWgSize, maxWorkItemSizes[0]);
147+
142148
size_t primeNumber =
143-
PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize);
149+
PrimeNumbers::getPrimeNumberInRange(effectiveMax / 2, effectiveMax);
144150
if (primeNumber < 1)
145151
{
146152
log_error("Cannot find proper prime number.");
@@ -149,7 +155,7 @@ REGISTER_TEST(non_uniform_2d_basic)
149155
size_t primeNumber2 = 42967;
150156
size_t primeNumber3 = 13;
151157
size_t globalSize[] = { primeNumber2, primeNumber3 };
152-
size_t localSize[] = { primeNumber, 1 };
158+
size_t localSize[] = {(size_t) primeNumber, 1 };
153159

154160
exec.runTestNonUniformWorkGroup(sizeof(globalSize)
155161
/ sizeof(globalSize[0]),
@@ -281,8 +287,14 @@ REGISTER_TEST(non_uniform_2d_atomics)
281287

282288
// non_uniform_2d_three_prime_numbers_atomics
283289
{
290+
size_t maxWorkItemSizes[3];
291+
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
292+
sizeof(maxWorkItemSizes), maxWorkItemSizes, NULL);
293+
294+
size_t effectiveMax = std::min(maxWgSize, maxWorkItemSizes[0]);
295+
284296
size_t primeNumber =
285-
PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize);
297+
PrimeNumbers::getPrimeNumberInRange(effectiveMax / 2, effectiveMax);
286298
if (primeNumber < 1)
287299
{
288300
log_error("Cannot find proper prime number.");
@@ -291,7 +303,7 @@ REGISTER_TEST(non_uniform_2d_atomics)
291303
size_t primeNumber2 = 42967;
292304
size_t primeNumber3 = 13;
293305
size_t globalSize[] = { primeNumber2, primeNumber3 };
294-
size_t localSize[] = { primeNumber, 1 };
306+
size_t localSize[] = {(size_t) primeNumber, 1 };
295307

296308
exec.runTestNonUniformWorkGroup(sizeof(globalSize)
297309
/ sizeof(globalSize[0]),
@@ -421,8 +433,14 @@ REGISTER_TEST(non_uniform_2d_barriers)
421433

422434
// non_uniform_2d_three_prime_numbers_barriers
423435
{
436+
size_t maxWorkItemSizes[3];
437+
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
438+
sizeof(maxWorkItemSizes), maxWorkItemSizes, NULL);
439+
440+
size_t effectiveMax = std::min(maxWgSize, maxWorkItemSizes[0]);
441+
424442
size_t primeNumber =
425-
PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize);
443+
PrimeNumbers::getPrimeNumberInRange(effectiveMax / 2, effectiveMax);
426444
if (primeNumber < 1)
427445
{
428446
log_error("Cannot find proper prime number.");
@@ -431,7 +449,7 @@ REGISTER_TEST(non_uniform_2d_barriers)
431449
size_t primeNumber2 = 42967;
432450
size_t primeNumber3 = 13;
433451
size_t globalSize[] = { primeNumber2, primeNumber3 };
434-
size_t localSize[] = { primeNumber, 1 };
452+
size_t localSize[] = {(size_t) primeNumber, 1 };
435453

436454
exec.runTestNonUniformWorkGroup(sizeof(globalSize)
437455
/ sizeof(globalSize[0]),

test_conformance/non_uniform_work_group/test_advanced_3d.cpp

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,14 @@ REGISTER_TEST(non_uniform_3d_basic)
112112

113113
// non_uniform_3d_three_prime_numbers_basic
114114
{
115+
size_t maxWorkItemSizes[3];
116+
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
117+
sizeof(maxWorkItemSizes), maxWorkItemSizes, NULL);
118+
119+
size_t effectiveMax = std::min(maxWgSize, maxWorkItemSizes[0]);
120+
115121
size_t primeNumber =
116-
PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize);
122+
PrimeNumbers::getPrimeNumberInRange(effectiveMax / 2, effectiveMax);
117123
if (primeNumber < 1)
118124
{
119125
log_error("Cannot find proper prime number.");
@@ -122,7 +128,7 @@ REGISTER_TEST(non_uniform_3d_basic)
122128
size_t primeNumber2 = 10711;
123129
size_t primeNumber3 = 13;
124130
size_t globalSize[] = { primeNumber2, primeNumber3, primeNumber3 };
125-
size_t localSize[] = { primeNumber, 1, 1 };
131+
size_t localSize[] = {(size_t) primeNumber, 1, 1 };
126132

127133
exec.runTestNonUniformWorkGroup(sizeof(globalSize)
128134
/ sizeof(globalSize[0]),
@@ -266,8 +272,14 @@ REGISTER_TEST(non_uniform_3d_atomics)
266272

267273
// non_uniform_3d_three_prime_numbers_atomics
268274
{
275+
size_t maxWorkItemSizes[3];
276+
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
277+
sizeof(maxWorkItemSizes), maxWorkItemSizes, NULL);
278+
279+
size_t effectiveMax = std::min(maxWgSize, maxWorkItemSizes[0]);
280+
269281
size_t primeNumber =
270-
PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize);
282+
PrimeNumbers::getPrimeNumberInRange(effectiveMax / 2, effectiveMax);
271283
if (primeNumber < 1)
272284
{
273285
log_error("Cannot find proper prime number.");
@@ -276,7 +288,7 @@ REGISTER_TEST(non_uniform_3d_atomics)
276288
size_t primeNumber2 = 10711;
277289
size_t primeNumber3 = 13;
278290
size_t globalSize[] = { primeNumber2, primeNumber3, primeNumber3 };
279-
size_t localSize[] = { primeNumber, 1, 1 };
291+
size_t localSize[] = {(size_t) primeNumber, 1, 1 };
280292

281293
exec.runTestNonUniformWorkGroup(sizeof(globalSize)
282294
/ sizeof(globalSize[0]),
@@ -420,8 +432,14 @@ REGISTER_TEST(non_uniform_3d_barriers)
420432

421433
// non_uniform_3d_three_prime_numbers_barriers
422434
{
435+
size_t maxWorkItemSizes[3];
436+
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
437+
sizeof(maxWorkItemSizes), maxWorkItemSizes, NULL);
438+
439+
size_t effectiveMax = std::min(maxWgSize, maxWorkItemSizes[0]);
440+
423441
size_t primeNumber =
424-
PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize);
442+
PrimeNumbers::getPrimeNumberInRange(effectiveMax / 2, effectiveMax);
425443
if (primeNumber < 1)
426444
{
427445
log_error("Cannot find proper prime number.");
@@ -430,7 +448,7 @@ REGISTER_TEST(non_uniform_3d_barriers)
430448
size_t primeNumber2 = 10711;
431449
size_t primeNumber3 = 13;
432450
size_t globalSize[] = { primeNumber2, primeNumber3, primeNumber3 };
433-
size_t localSize[] = { primeNumber, 1, 1 };
451+
size_t localSize[] = {(size_t) primeNumber, 1, 1 };
434452

435453
exec.runTestNonUniformWorkGroup(sizeof(globalSize)
436454
/ sizeof(globalSize[0]),

test_conformance/non_uniform_work_group/test_advanced_other.cpp

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,14 @@ REGISTER_TEST(non_uniform_other_basic)
4848

4949
// non_uniform_2d_three_prime_numbers_offset_basic
5050
{
51+
size_t maxWorkItemSizes[3];
52+
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
53+
sizeof(maxWorkItemSizes), maxWorkItemSizes, NULL);
54+
55+
size_t effectiveMax = std::min(maxWgSize, maxWorkItemSizes[0]);
56+
5157
size_t primeNumber =
52-
PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize);
58+
PrimeNumbers::getPrimeNumberInRange(effectiveMax / 2, effectiveMax);
5359
if (primeNumber < 1)
5460
{
5561
log_error("Cannot find proper prime number.");
@@ -58,7 +64,7 @@ REGISTER_TEST(non_uniform_other_basic)
5864
size_t primeNumber2 = 42967;
5965
size_t primeNumber3 = 13;
6066
size_t globalSize[] = { primeNumber2, primeNumber3 };
61-
size_t localSize[] = { primeNumber, 1 };
67+
size_t localSize[] = {(size_t) primeNumber, 1 };
6268
size_t offset[] = { 23, 17 };
6369

6470
exec.runTestNonUniformWorkGroup(
@@ -149,8 +155,14 @@ REGISTER_TEST(non_uniform_other_atomics)
149155

150156
// non_uniform_2d_three_prime_numbers_offset_atomics
151157
{
158+
size_t maxWorkItemSizes[3];
159+
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
160+
sizeof(maxWorkItemSizes), maxWorkItemSizes, NULL);
161+
162+
size_t effectiveMax = std::min(maxWgSize, maxWorkItemSizes[0]);
163+
152164
size_t primeNumber =
153-
PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize);
165+
PrimeNumbers::getPrimeNumberInRange(effectiveMax / 2, effectiveMax);
154166
if (primeNumber < 1)
155167
{
156168
log_error("Cannot find proper prime number.");
@@ -159,7 +171,7 @@ REGISTER_TEST(non_uniform_other_atomics)
159171
size_t primeNumber2 = 42967;
160172
size_t primeNumber3 = 13;
161173
size_t globalSize[] = { primeNumber2, primeNumber3 };
162-
size_t localSize[] = { primeNumber, 1 };
174+
size_t localSize[] = {(size_t) primeNumber, 1 };
163175
size_t offset[] = { 23, 17 };
164176

165177
exec.runTestNonUniformWorkGroup(
@@ -249,8 +261,14 @@ REGISTER_TEST(non_uniform_other_barriers)
249261

250262
// non_uniform_2d_three_prime_numbers_offset_barriers
251263
{
264+
size_t maxWorkItemSizes[3];
265+
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
266+
sizeof(maxWorkItemSizes), maxWorkItemSizes, NULL);
267+
268+
size_t effectiveMax = std::min(maxWgSize, maxWorkItemSizes[0]);
269+
252270
size_t primeNumber =
253-
PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize);
271+
PrimeNumbers::getPrimeNumberInRange(effectiveMax / 2, effectiveMax);
254272
if (primeNumber < 1)
255273
{
256274
log_error("Cannot find proper prime number.");
@@ -259,7 +277,7 @@ REGISTER_TEST(non_uniform_other_barriers)
259277
size_t primeNumber2 = 42967;
260278
size_t primeNumber3 = 13;
261279
size_t globalSize[] = { primeNumber2, primeNumber3 };
262-
size_t localSize[] = { primeNumber, 1 };
280+
size_t localSize[] = {(size_t) primeNumber, 1 };
263281
size_t offset[] = { 23, 17 };
264282

265283
exec.runTestNonUniformWorkGroup(

0 commit comments

Comments
 (0)