Skip to content

Commit 01460d8

Browse files
committed
add aluperf sample
1 parent 6e8a40a commit 01460d8

3 files changed

Lines changed: 169 additions & 0 deletions

File tree

samples/99_aluperf/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (c) 2022 Ben Ashbaugh
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
add_opencl_sample(
6+
TEST
7+
NUMBER 99
8+
TARGET aluperf
9+
VERSION 120
10+
SOURCES main.cpp)

samples/99_aluperf/main.cpp

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/*
2+
// Copyright (c) 2022 Ben Ashbaugh
3+
//
4+
// SPDX-License-Identifier: MIT
5+
*/
6+
7+
#include <popl/popl.hpp>
8+
9+
#include <CL/opencl.hpp>
10+
11+
#include <algorithm>
12+
#include <cinttypes>
13+
#include <strstream>
14+
#include <vector>
15+
16+
int main(
17+
int argc,
18+
char** argv )
19+
{
20+
int platformIndex = 0;
21+
int deviceIndex = 0;
22+
23+
size_t gws = 1024 * 1024;
24+
size_t lws = 0;
25+
size_t iterations = 16;
26+
size_t ops = 1024;
27+
uint32_t seed = 0;
28+
29+
std::string datatype;
30+
std::string operation;
31+
std::string buildOptions;
32+
33+
{
34+
popl::OptionParser op("Supported Options");
35+
op.add<popl::Value<int>>("p", "platform", "Platform Index", platformIndex, &platformIndex);
36+
op.add<popl::Value<int>>("d", "device", "Device Index", deviceIndex, &deviceIndex);
37+
op.add<popl::Value<size_t>>("g", "gws", "Global Work Size", gws, &gws);
38+
op.add<popl::Value<size_t>>("l", "lws", "Local Work Size (0 -> NULL)", lws, &lws);
39+
op.add<popl::Value<size_t>>("i", "iterations", "Iterations", iterations, &iterations);
40+
op.add<popl::Value<size_t>>("o", "ops", "Operations to Run Per Kernel", ops, &ops);
41+
op.add<popl::Value<uint32_t>>("s", "seed", "Seed Value For Computation", seed, &seed);
42+
op.add<popl::Value<std::string>>("", "options", "Program Build Options", buildOptions, &buildOptions);
43+
op.add<popl::Value<std::string>>("", "type", "Data Type for Computation", datatype, &datatype);
44+
op.add<popl::Value<std::string>>("", "operation", "Operation to Test", operation, &operation);
45+
46+
bool printUsage = false;
47+
try {
48+
op.parse(argc, argv);
49+
} catch (std::exception& e) {
50+
fprintf(stderr, "Error: %s\n\n", e.what());
51+
printUsage = true;
52+
}
53+
54+
if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) {
55+
fprintf(stderr,
56+
"Usage: copybufferkernel [options]\n"
57+
"%s", op.help().c_str());
58+
fprintf(stderr,
59+
"\n"
60+
"Note: for best results, the operation should assign to z and be a function of z.\n"
61+
" Other symbols that can be used are:\n"
62+
" x: data read from a buffer, unique for each local id\n"
63+
" y: this work-item's local id\n"
64+
" Example: z = z + x\n");
65+
return -1;
66+
}
67+
}
68+
69+
std::vector<cl::Platform> platforms;
70+
cl::Platform::get(&platforms);
71+
72+
printf("Running on platform: %s\n",
73+
platforms[platformIndex].getInfo<CL_PLATFORM_NAME>().c_str() );
74+
75+
std::vector<cl::Device> devices;
76+
platforms[platformIndex].getDevices(CL_DEVICE_TYPE_ALL, &devices);
77+
78+
printf("Running on device: %s\n",
79+
devices[deviceIndex].getInfo<CL_DEVICE_NAME>().c_str() );
80+
81+
cl::Context context{devices[deviceIndex]};
82+
83+
std::stringstream ss;
84+
85+
if (datatype.rfind("half", 0) == 0) {
86+
ss << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable" << std::endl;
87+
}
88+
else if (datatype.rfind("doubld", 0) == 0) {
89+
ss << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" << std::endl;
90+
}
91+
92+
ss << "__kernel void Bench(__global " << datatype << " * buffer)" << std::endl;
93+
ss << "{" << std::endl;
94+
ss << " " << datatype << " x = buffer[get_local_id(0)];" << std::endl;
95+
ss << " " << datatype << " y = (" << datatype << ")get_local_id(0);" << std::endl;
96+
ss << " " << datatype << " z = x;" << std::endl;
97+
for (size_t i = 0; i < ops; i++) {
98+
ss << " " << operation << ";" << std::endl;
99+
}
100+
101+
ss << " buffer[get_local_id(0)] = z;" << std::endl;
102+
ss << "}" << std::endl;
103+
104+
cl::Program program{ context, ss.str() };
105+
106+
printf("Building program with build options: %s\n",
107+
buildOptions.empty() ? "(none)" : buildOptions.c_str());
108+
program.build(buildOptions.c_str());
109+
cl::Kernel kernel{ program, "Bench" };
110+
111+
cl::CommandQueue commandQueue{context, devices[deviceIndex], CL_QUEUE_PROFILING_ENABLE};
112+
113+
std::vector<uint32_t> data(1024 * 16 * 16 * 2, seed);
114+
cl::Buffer buf = cl::Buffer{
115+
context,
116+
CL_MEM_COPY_HOST_PTR,
117+
data.size() * sizeof(data[0]),
118+
data.data() };
119+
120+
kernel.setArg(0, buf);
121+
122+
double minTime = 1e9;
123+
double maxTime = 0;
124+
125+
for (size_t i = 0; i < iterations; i++) {
126+
printf("."); fflush(stdout);
127+
128+
cl::Event event;
129+
commandQueue.enqueueNDRangeKernel(
130+
kernel,
131+
cl::NullRange,
132+
cl::NDRange{gws},
133+
(lws == 0) ? cl::NullRange : cl::NDRange{lws},
134+
nullptr,
135+
&event);
136+
commandQueue.finish();
137+
138+
cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
139+
cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
140+
141+
double time = (end - start) / 1e9;
142+
143+
minTime = std::min(time, minTime);
144+
maxTime = std::max(time, maxTime);
145+
}
146+
147+
double rate = (double)gws * ops / (minTime * 1024 * 1024 * 1024);
148+
149+
std::stringstream fnty;
150+
fnty << operation << " (" << datatype << ")";
151+
152+
printf("\n");
153+
printf("%32s %10s %10s %12s\n", "Function", "Min Time", "Max Time", "Max Ops/s");
154+
printf("%32s %10f %10f %12f\n", fnty.str().c_str(), minTime, maxTime, rate);
155+
156+
return 0;
157+
}

samples/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ add_subdirectory( 06_ndrangekernelfromfile )
7878
add_subdirectory( 10_queueexperiments )
7979
add_subdirectory( 16_floatatomics )
8080

81+
add_subdirectory( 99_aluperf )
82+
8183
set(BUILD_EXTENSION_SAMPLES TRUE)
8284
if(NOT TARGET OpenCLExt)
8385
message(STATUS "Skipping Extension Samples - OpenCL Extension Loader is not found.")

0 commit comments

Comments
 (0)