From 47863faa9ba4f3d3598323166b5653915eacfdeb Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 17 Oct 2024 15:00:08 +0100 Subject: [PATCH 01/73] Add sycl_khr_free_function_commands extension This extension provides an alternative mechanism for submitting commands to a device via free-functions that require developers to opt-in to the creation of event objects. It also proposes alternative names for several commands (e.g., launch) and simplifies some concepts (e.g., by removing the need for the nd_range class). --- adoc/chapters/architecture.adoc | 1 + adoc/extensions/index.adoc | 4 +- .../sycl_khr_free_function_commands.adoc | 928 ++++++++++++++++++ 3 files changed, 931 insertions(+), 2 deletions(-) create mode 100644 adoc/extensions/sycl_khr_free_function_commands.adoc diff --git a/adoc/chapters/architecture.adoc b/adoc/chapters/architecture.adoc index ff3e8d3af..6f66f69b0 100644 --- a/adoc/chapters/architecture.adoc +++ b/adoc/chapters/architecture.adoc @@ -1978,6 +1978,7 @@ always matches the byte order of the devices. This allows data to be copied between the host and the devices without any byte swapping. +[[subsec:example.sycl.application]] == Example SYCL application Below is a more complex example application, combining some of the features diff --git a/adoc/extensions/index.adoc b/adoc/extensions/index.adoc index 07062df61..eb41d48cf 100644 --- a/adoc/extensions/index.adoc +++ b/adoc/extensions/index.adoc @@ -7,7 +7,7 @@ working group. These extensions may be promoted to core features in future versions of the SYCL specification, but their design is subject to change. -(There are currently no extensions in this appendix.) - // leveloffset=2 allows extensions to be written as standalone documents // include::sycl_khr_extension_name.adoc[leveloffset=2] + +include::sycl_khr_free_function_commands.adoc[leveloffset=2] diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc new file mode 100644 index 000000000..a404632b0 --- /dev/null +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -0,0 +1,928 @@ += SYCL_KHR_FREE_FUNCTION_COMMANDS + +This extension provides an alternative mechanism for submitting commands to a +device via free-functions that require developers to opt-in to the creation of +[code]#event# objects. + +== Dependencies + +This extension has no dependencies on other extensions. + +== Feature test macro + +An implementation supporting this extension must predefine the macro +[code]#SYCL_KHR_FREE_FUNCTION_COMMANDS# to one of the values defined in the +table below. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|Initial version of this extension. +|=== + +== Usage example + +The example below rewrites the application from +<> to demonstrate the usage of this extension. + +[source,role=synopsis] +---- +#include +#include +using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names + +// Size of the matrices +constexpr size_t N = 2000; +constexpr size_t M = 3000; + +int main() { + // Create a queue to work on + queue myQueue; + + // Create some 2D arrays of float for our matrices + float* a = malloc_shared(N * M, myQueue); + float* b = malloc_shared(N * M, myQueue); + float* c = malloc_shared(N * M, myQueue); + + // Launch an asynchronous kernel to initialize a + // Use khr::submit to get a handler, even though not required here + khr::submit(myQueue, [&](handler& cgh) { + + // Enqueue a kernel iterating on a N*M 2D iteration space + khr::launch(cgh, range<2> { N, M }, [=](id<2> index) { + size_t i = index[0]; + size_t j = index[1]; + a[i * M + j] = i * 2 + j; + }); + + }); + + // Launch an asynchronous kernel to initialize b + // Use khr::launch to enqueue a kernel without a handler + khr::launch(myQueue, range<2> { N, M }, [=](id<2> index) { + size_t i = index[0]; + size_t j = index[1]; + b[i * M + j] = i * 2014 + j * 42; + }); + + // Ensure that the two previous kernels complete before enqueueing more work + // This does not block the host, but enforces dependencies on the device + khr::command_barrier(myQueue); + + // Launch an asynchronous kernel to compute matrix addition c = a + b + // Use khr::launch_grouped to enqueue a kernel using groups without a handler + range<2> local = { 2, 2 }; + range<2> global = { N, M }; + khr::launch_grouped(myQueue, global, local, [=](khr::invocation<2> ivc) { + size_t i = ivc.id(0); + size_t j = ivc.id(1); + size_t index = i * M + j; + c[index] = a[index] + b[index]; + }); + + // Wait for all three kernels to complete before accessing the results + // This blocks the host until all previous kernels have completed + myQueue.wait(); + + std::cout << std::endl << "Result:" << std::endl; + for (size_t i = 0; i < N; i++) { + for (size_t j = 0; j < M; j++) { + size_t index = i * M + j; + // Compare the result to the analytic value + if (c[index] != i * (2 + 2014) + j * (1 + 42)) { + std::cout << "Wrong value " << c[index] << " on element " << i << " " + << j << std::endl; + exit(-1); + } + } + } + + std::cout << "Good computation!" << std::endl; + return 0; +} +---- + +== New free functions + +=== Command-groups + +.[apititle]#submit# +[source,role=synopsis,id=api:submit] +---- +namespace sycl::khr { + +template +void submit(sycl::queue q, CommandGroupFunc&& cgf); + +} +---- +_Effects_: Equivalent to [code]#q.submit(cgf)#. + +''' + +.[apititle]#submit_tracked# +[source,role=synopsis,id=api:submit_tracked] +---- +namespace sycl::khr { + +template +sycl::event submit_tracked(sycl::queue q, CommandGroupFunc&& cgf); + +} +---- +_Effects_: Equivalent to [code]#q.submit(cgf)#. + +_Returns_: A [code]#sycl::event# associated with the submitted command. + +''' + +=== Kernel launch + +.[apititle]#launch# (kernel function) +[source,role=synopsis,id=api:launch] +---- +namespace sycl::khr { + +template +void launch(sycl::handler& h, sycl::range<1> r, const KernelType& k); (1) + +template +void launch(sycl::handler& h, sycl::range<2> r, const KernelType& k); (2) + +template +void launch(sycl::handler& h, sycl::range<3> r, const KernelType& k); (3) + +template +void launch(sycl::queue q, sycl::range<1> r, const KernelType& k); (4) + +template +void launch(sycl::queue q, sycl::range<2> r, const KernelType& k); (5) + +template +void launch(sycl::queue q, sycl::range<3> r, const KernelType& k); (6) + +} +---- +_Effects (1-3)_: Equivalent to [code]#h.parallel_for(r, k)#. + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { launch(h, r, +k); })#. + +''' + +.[apititle]#launch# (kernel object) +[source,role=synopsis,id=api:launch-kernel] +---- +namespace sycl::khr { + +template +void launch(sycl::handler& h, sycl::range<1> r, + const sycl::kernel& k, Args&&... args); (1) + +template +void launch(sycl::handler& h, sycl::range<2> r, + const sycl::kernel& k, Args&&... args); (2) + +template +void launch(sycl::handler& h, sycl::range<3> r, + const sycl::kernel& k, Args&&... args); (3) + +template +void launch(sycl::queue q, sycl::range<1> r, + const sycl::kernel& k, Args&&... args); (4) + +template +void launch(sycl::queue q, sycl::range<2> r, + const sycl::kernel& k, Args&&... args); (5) + +template +void launch(sycl::queue q, sycl::range<3> r, + const sycl::kernel& k, Args&&... args); (6) + +} +---- +_Effects_: Enqueues a kernel object that is invoked for every work-item in the +specified [code]#sycl::range#. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +.[apititle]#launch_reduce# (kernel function) +[source,role=synopsis,id=api:launch_reduce] +---- +namespace sycl::khr { + +template +void launch_reduce(sycl::handler& h, sycl::range<1> r, + const KernelType& k, Reductions&&... reductions); (1) + +template +void launch_reduce(sycl::handler& h, sycl::range<2> r, + const KernelType& k, Reductions&&... reductions); (2) + +template +void launch_reduce(sycl::handler& h, sycl::range<3> r, + const KernelType& k, Reductions&&... reductions); (3) + +template +void launch_reduce(sycl::queue q, sycl::range<1> r, + const KernelType& k, Reductions&&... reductions); (4) + +template +void launch_reduce(sycl::queue q, sycl::range<2> r, + const KernelType& k, Reductions&&... reductions); (5) + +template +void launch_reduce(sycl::queue q, sycl::range<3> r, + const KernelType& k, Reductions&&... reductions); (6) + +} +---- +_Constraints_: The parameter pack consists of 0 or more objects created by the +[code]#sycl::reduction# function. + +_Effects (1-3)_: Equivalent to [code]#h.parallel_for(r, reductions..., k)#. + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_reduce(h, r, k, reductions...); })#. + + +''' + +.[apititle]#launch_reduce# (kernel object) +[source,role=synopsis,id=api:launch_reduce-kernel] +---- +namespace sycl::khr { + +template +void launch_reduce(sycl::handler& h, sycl::range<1> r, + const sycl::kernel& k, Args&&... args); (1) + +template +void launch_reduce(sycl::handler& h, sycl::range<2> r, + const sycl::kernel& k, Args&&... args); (2) + +template +void launch_reduce(sycl::handler& h, sycl::range<3> r, + const sycl::kernel& k, Args&&... args); (3) + +template +void launch_reduce(sycl::queue q, sycl::range<1> r, + const sycl::kernel& k, Args&&... args); (4) + +template +void launch_reduce(sycl::queue q, sycl::range<2> r, + const sycl::kernel& k, Args&&... args); (5) + +template +void launch_reduce(sycl::queue q, sycl::range<3> r, + const sycl::kernel& k, Args&&... args); (6) + +} +---- +_Effects_: Enqueues a kernel object that is invoked for every work-item in the +specified [code]#sycl::range#, where each work-item contributes to one or more +reductions. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +.[apititle]#launch_grouped# (kernel function) +[source,role=synopsis,id=api:launch_grouped] +---- +namespace sycl::khr { + +template +void launch_grouped(sycl::handler& h, sycl::range<1> r, sycl::range<1> size, + const KernelType& k); (1) + +template +void launch_grouped(sycl::handler& h, sycl::range<2> r, sycl::range<2> size, + const KernelType& k); (2) + +template +void launch_grouped(sycl::handler& h, sycl::range<3> r, sycl::range<3> size, + const KernelType& k); (3) + +template +void launch_grouped(sycl::queue q, sycl::range<1> r, sycl::range<1> size, + const KernelType& k); (4) + +template +void launch_grouped(sycl::queue q, sycl::range<2> r, sycl::range<2> size, + const KernelType& k); (5) + +template +void launch_grouped(sycl::queue q, sycl::range<3> r, sycl::range<3> size, + const KernelType& k); (6) + +} +---- +_Effects (1-3)_: Equivalent to [code]#h.parallel_for(nd_range(r, size), k)#. + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_grouped(h, r, size, k); })#. + +''' + +.[apititle]#launch_grouped# (kernel object) +[source,role=synopsis,id=api:launch_grouped-kernel] +---- +namespace sycl::khr { + +template +void launch_grouped(sycl::handler& h, sycl::range<1> r, sycl::range<1> size, + const sycl::kernel& k, Args&&... args); (1) + +template +void launch_grouped(sycl::handler& h, sycl::range<2> r, sycl::range<2> size, + const sycl::kernel& k, Args&&... args); (2) + +template +void launch_grouped(sycl::handler& h, sycl::range<3> r, sycl::range<3> size, + const sycl::kernel& k, Args&&... args); (3) + +template +void launch_grouped(sycl::queue q, sycl::range<1> r, sycl::range<1> size, + const sycl::kernel& k, Args&&... args); (4) + +template +void launch_grouped(sycl::queue q, sycl::range<2> r, sycl::range<2> size, + const sycl::kernel& k, Args&&... args); (5) + +template +void launch_grouped(sycl::queue q, sycl::range<3> r, sycl::range<3> size, + const sycl::kernel& k, Args&&... args); (6) + +} +---- +_Effects_: Enqueues a kernel object that is invoked for every work-item in the +specified [code]#sycl::range#. +Work-items are grouped into work-groups of size [code]#size#. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +.[apititle]#launch_grouped_reduce# (kernel function) +[source,role=synopsis,id=api:launch_grouped_reduce] +---- +namespace sycl::khr { + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<1> r, + sycl::range<1> size, const KernelType& k, + Reductions&&... reductions); (1) + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<2> r, + sycl::range<2> size, const KernelType& k, + Reductions&&... reductions); (2) + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<3> r, + sycl::range<3> size, const KernelType& k, + Reductions&&... reductions); (3) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<1> r, + sycl::range<1> size, const KernelType& k, + Reductions&&... reductions); (4) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<2> r, + sycl::range<2> size, const KernelType& k, + Reductions&&... reductions); (5) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, + sycl::range<3> size, const KernelType& k, + Reductions&&... reductions); (6) + +} +---- +_Constraints_: The parameter pack consists of 0 or more objects created by the +[code]#sycl::reduction# function. + +_Effects (1-3)_: Equivalent to [code]#h.parallel_for(nd_range(r, size), +reductions..., k)#. + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_grouped_reduce(h, r, size, k, reductions...); })#. + +''' + +.[apititle]#launch_grouped_reduce# (kernel object) +[source,role=synopsis,id=api:launch_grouped_reduce-kernel] +---- +namespace sycl::khr { + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<1> r, + sycl::range<1> size, const sycl::kernel& k, + Args&&... args); (1) + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<2> r, + sycl::range<2> size, const sycl::kernel& k, + Args&&... args); (2) + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<3> r, + sycl::range<3> size, const sycl::kernel& k, + Args&&... args); (3) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<1> r, + sycl::range<1> size, const sycl::kernel& k, + Args&&... args); (4) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<2> r, + sycl::range<2> size, const sycl::kernel& k, + Args&&... args); (5) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, + sycl::range<3> size, const sycl::kernel& k, + Args&&... args); (6) + +} +---- +_Effects_: Enqueues a kernel object that is invoked for every work-item in the +specified [code]#sycl::range#, where each work-item contributes to one or more +reductions. +Work-items are grouped into work-groups of size [code]#size#. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +.[apititle]#launch_task# (kernel function) +[source,role=synopsis,id=api:launch_task] +---- +namespace sycl::khr { + +template +void launch_task(sycl::handler& h, const KernelType& k); (1) + +template +void launch_task(sycl::queue q, const KernelType& k); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.single_task(k)#. + +_Effects (2)_: Equivalent to [code]#h.submit([&](handler& h) { launch_task(h, +k); })#. + +''' + +.[apititle]#launch_task# (kernel object) +[source,role=synopsis,id=api:launch_task-kernel] +---- +namespace sycl::khr { + +template +void launch_task(sycl::queue q, const sycl::kernel& k, Args&&... args); (1) + +template +void launch_task(sycl::handler& h, const sycl::kernel& k, Args&&... args); (2) + +} +---- +_Effects_: Enqueues a kernel object as a device task. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +=== Memory operations + +.[apititle]#memcpy# +[source,role=synopsis,id=api:memcpy] +---- +namespace sycl::khr { + +void memcpy(sycl::handler& h, void* dest, const void* src, size_t numBytes); (1) + +void memcpy(sycl::queue q, void* dest, const void* src, size_t numBytes); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.memcpy(dest, src, numBytes)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { memcpy(h, dest, +src, numBytes); })#. + +''' + +.[apititle]#copy# (USM pointers) +[source,role=synopsis,id=api:copy-pointer] +---- +namespace sycl::khr { + +template +void copy(sycl::handler& h, const T* src, T* dest, size_t count); (1) + +template +void copy(sycl::queue q, const T* src, T* dest, size_t count); (2) + +} +---- + +Copies between two USM pointers. + +_Constraints_: [code]#T# must be <>. + +_Preconditions_: [code]#src# and [code]#dest# must be host pointers or USM +pointers accessible on the device. +[code]#src# and [code]#dest# must point to allocations of at least [code]#count# +elements of type [code]#T#. + +_Effects (1)_: Equivalent to [code]#h.copy(src, dest, count)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +dest, count); })# + +''' + +.[apititle]#copy# (accessors, host to device) +[source,role=synopsis,id=api:copy-accessor-h2d] +---- +namespace sycl::khr { + +template +void copy(sycl::handler& h, + const SrcT* src, + sycl::accessor dest); (3) + +template +void copy(sycl::handler& h, + std::shared_ptr src, + sycl::accessor dest); (4) + +template +void copy(sycl::queue q, + const SrcT* src, + sycl::accessor dest); (5) + +template +void copy(sycl::queue q, + std::shared_ptr src, + sycl::accessor dest); (6) + +} +---- + +Copies from host to device. + +_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. +[code]#DestMode# must be [code]#access_mode::write# or +[code]#access_mode::read_write#. + +_Preconditions_: [code]#src# must be a host pointer, pointing to an allocation +of at least as many bytes as the range represented by [code]#dest#. + +_Effects (3-4)_: Equivalent to [code]#h.copy(src, dest)#. + +_Effects (5-6)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +dest) })# + +''' + +.[apititle]#copy# (accessors, device to host) +[source,role=synopsis,id=api:copy-accessor-d2h] +---- +namespace sycl::khr { + +template +void copy(sycl::handler& h, + sycl::accessor src, + DestT* dest); (7) + +template +void copy(sycl::handler& h, + sycl::accessor src, + std::shared_ptr dest); (8) + +template +void copy(sycl::queue q, + sycl::accessor src, + DestT* dest); (9) + +template +void copy(sycl::queue q, + sycl::accessor src, + std::shared_ptr dest); (10) + +} +---- + +Copies from device to host. + +_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. +[code]#DestMode# must be [code]#access_mode::read# or +[code]#access_mode::read_write#. + +_Preconditions_: [code]#dest# must be a host pointer, pointing to an allocation +of at least as many bytes as the range represented by [code]#src#. + +_Effects (7-8)_: Equivalent to [code]#h.copy(src, dest)#. + +_Effects (9-10)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +dest); })#. + +''' + +.[apititle]#copy# (accessors, device to device) +[source,role=synopsis,id=api:copy-accessor-d2d] +---- +namespace sycl::khr { + +template +void copy(sycl::queue q, + sycl::accessor src, + sycl::accessor dest); (11) + +template +void copy(sycl::queue q, + sycl::accessor src, + sycl::accessor dest); (12) + +} +---- + +Copies between two device accessors. + +_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. +[code]#SrcMode# must be [code]#access_mode::read# or +[code]#access_mode::read_write#. +[code]#DestMode# must be [code]#access_mode::write# or +[code]#access_mode::read_write#. + +_Effects (11)_: Equivalent to [code]#h.copy(src, dest)#. + +_Effects (12)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +dest); })#. + +_Throws_: A synchronous [code]#exception# with the [code]#errc::invalid# error +code if [code]#dest.get_count() < src.get_count()#. + +''' + +.[apititle]#memset# +[source,role=synopsis,id=api:memset] +---- +namespace sycl::khr { + +void memset(sycl::handler& h, void* ptr, int value, size_t numBytes); (1) + +void memset(sycl::queue q, void* ptr, int value, size_t numBytes); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.memset(ptr, value, numBytes)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { memset(h, value, +numBytes); })#. + +''' + +.[apititle]#fill# +[source,role=synopsis,id=api:fill] +---- +namespace sycl::khr { + +template +void fill(sycl::handler& h, T* ptr, const T& pattern, size_t count); (1) + +template +void fill(sycl::handler& h, + sycl::accessor dest, + const T& src); (2) + +template +void fill(sycl::queue q, T* ptr, const T& pattern, size_t count); (3) + +template +void fill(sycl::queue q, + sycl::accessor dest, + const T& src); (4) + +} +---- +_Effects (1)_: Equivalent to [code]#h.fill(ptr, pattern, count)#. + +_Effects (2)_: Equivalent to [code]#h.fill(dest, src)#. + +_Effects (3)_: Equivalent to [code]#q.submit([&](handler& h) { fill(h, ptr, +pattern, count); })#. + +_Effects (4)_: Equivalent to [code]#q.submit([&](handler& h) { fill(h, dest, +src); })#. + +''' + +.[apititle]#update_host# +[source,role=synopsis,id=api:update_host] +---- +namespace sycl::khr { + +template +void update_host(sycl::handler& h, accessor acc); (1) + +template +void update_host(sycl::queue q, accessor acc); (2) + +} +---- +_Constraints_: [code]#T# must be <>. + +_Effects (1)_: Equivalent to [code]#h.update_host(acc)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { update_host(h, +acc); })#. + +''' + +.[apititle]#prefetch# +[source,role=synopsis,id=api:prefetch] +---- +namespace sycl::khr { + +void prefetch(sycl::handler& h, void* ptr, size_t numBytes); (1) + +void prefetch(sycl::queue q, void* ptr, size_t numBytes); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.prefetch(ptr, numBytes)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { prefetch(h, ptr, +numBytes); })#. + +''' + +.[apititle]#mem_advise# +[source,role=synopsis,id=api:mem_advise] +---- +namespace sycl::khr { + +void mem_advise(sycl::handler& h, void* ptr, size_t numBytes, int advice); (1) + +void mem_advise(sycl::queue q, void* ptr, size_t numBytes, int advice); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.mem_advise(ptr, numBytes, advice)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { mem_advise(h, +ptr, numBytes, advice); })#. + +''' + +=== Command and event barriers + +.[apititle]#command_barrier# +[source,role=synopsis,id=api:command_barrier] +---- +namespace sycl::khr { + +void command_barrier(sycl::handler& h); (1) + +void command_barrier(sycl::queue q); (2) + +} +---- +_Effects_: Enqueues a command barrier. +Any commands submitted after this barrier cannot begin execution until all +previously submitted commands (and any commands associated with dependendent +events) have completed. + +''' + +.[apititle]#event_barrier# +[source,role=synopsis,id=api:event_barrier] +---- +namespace sycl::khr { + +void event_barrier(sycl::handler& h, const std::vector& events); (1) + +void event_barrier(sycl::queue q, const std::vector& events); (2) + +} +---- +_Effects_: Enqueues an event barrier. +Any commands submitted after this barrier cannot begin execution until all +commands associated with [code]#events# (and any commands associated with other +dependent events) have completed. + +{note}For both overloads, if [code]#events# is empty and an event barrier has no +other dependencies (e.g., specified by [code]#handler::depends_on#), it is ot +required to wait for any commands unless the [code]#queue# is in-order.{endnote} + +''' + +== [code]#invocation# class template + +The [code]#invocation# class template identifies an invocation of a kernel +function. + +Instances of the [code]#invocation# class template are not user-constructible +and are passed as an argument to each invocation of a kernel function. + +[source,role=synopsis] +---- +namespace sycl::khr { + +template +class invocation +{ + public: + static constexpr int dimensions = Dimensions; + + id id() const noexcept; + size_t linear_id() const noexcept; + + range range() const noexcept; + + group get_work_group() const noexcept; + + sub_group get_sub_group() const noexcept; + + // Available for backwards compatibility only + operator nd_item() const noexcept; +}; + +} +---- + +.[apidef]#id# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-id] +---- +id id() const noexcept; +---- +_Returns_: The index of this invocation within the kernel dispatch. + +''' + +.[apidef]#linear_id# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-linear_id] +---- +size_t linear_id() const noexcept; +---- +_Returns_: The linearized index (see <>) of this +invocation within the kernel dispatch. + +''' + +.[apidef]#range# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-range] +---- +range range() const noexcept; +---- + +_Returns_: An index space representing all invocations of this kernel. + +''' + +.[apidef]#get_work_group# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-get_work_group] +---- +group get_work_group() const noexcept; +---- + +_Returns_: A [code]#group# representing the <> to which this +invocation belongs. + +''' + +.[apidef]#get_sub_group# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-get_sub_group] +---- +sub_group get_sub_group() const noexcept; +---- + +_Returns_: A [code]#sub_group# representing the sub-group to which this +invocation belongs. + +''' + +.[apidef]#nd_item conversion operator# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-nd_item-conversion-operator] +---- +operator nd_item() const noexcept; +---- + +_Returns_: An [code]#nd_item# representing this invocation. + +{note}This function exists only to provide backwards compatibility with SYCL +2020 code in order to facilitate experimentation with the new interface proposed +by this extension.{endnote} + +== Issues + +None. From ce08652764e915d0d5bb16628869a7e2f5efd657 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 5 Dec 2024 09:55:50 +0000 Subject: [PATCH 02/73] Reword khr_free_function_commands comment --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index a404632b0..d72646685 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -48,7 +48,7 @@ int main() { float* c = malloc_shared(N * M, myQueue); // Launch an asynchronous kernel to initialize a - // Use khr::submit to get a handler, even though not required here + // Use khr::submit to enqueue a kernel via a handler khr::submit(myQueue, [&](handler& cgh) { // Enqueue a kernel iterating on a N*M 2D iteration space From 372bb3b2d8462248c96972b5ea69396c776f006a Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 5 Dec 2024 09:58:23 +0000 Subject: [PATCH 03/73] Add periods to khr_free_function_commands comments --- .../sycl_khr_free_function_commands.adoc | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index d72646685..ed024758e 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -32,26 +32,26 @@ The example below rewrites the application from ---- #include #include -using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names +using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names. -// Size of the matrices +// Size of the matrices. constexpr size_t N = 2000; constexpr size_t M = 3000; int main() { - // Create a queue to work on + // Create a queue to work on. queue myQueue; - // Create some 2D arrays of float for our matrices + // Create some 2D arrays of float for our matrices. float* a = malloc_shared(N * M, myQueue); float* b = malloc_shared(N * M, myQueue); float* c = malloc_shared(N * M, myQueue); - // Launch an asynchronous kernel to initialize a - // Use khr::submit to enqueue a kernel via a handler + // Launch an asynchronous kernel to initialize a. + // Use khr::submit to enqueue a kernel via a handler. khr::submit(myQueue, [&](handler& cgh) { - // Enqueue a kernel iterating on a N*M 2D iteration space + // Enqueue a kernel iterating on a N*M 2D iteration space. khr::launch(cgh, range<2> { N, M }, [=](id<2> index) { size_t i = index[0]; size_t j = index[1]; @@ -60,20 +60,20 @@ int main() { }); - // Launch an asynchronous kernel to initialize b - // Use khr::launch to enqueue a kernel without a handler + // Launch an asynchronous kernel to initialize b. + // Use khr::launch to enqueue a kernel without a handler. khr::launch(myQueue, range<2> { N, M }, [=](id<2> index) { size_t i = index[0]; size_t j = index[1]; b[i * M + j] = i * 2014 + j * 42; }); - // Ensure that the two previous kernels complete before enqueueing more work - // This does not block the host, but enforces dependencies on the device + // Ensure that the two previous kernels complete before enqueueing more work. + // This does not block the host, but enforces dependencies on the device. khr::command_barrier(myQueue); - // Launch an asynchronous kernel to compute matrix addition c = a + b - // Use khr::launch_grouped to enqueue a kernel using groups without a handler + // Launch an asynchronous kernel to compute matrix addition c = a + b. + // Use khr::launch_grouped to enqueue a kernel using groups without a handler. range<2> local = { 2, 2 }; range<2> global = { N, M }; khr::launch_grouped(myQueue, global, local, [=](khr::invocation<2> ivc) { @@ -83,15 +83,15 @@ int main() { c[index] = a[index] + b[index]; }); - // Wait for all three kernels to complete before accessing the results - // This blocks the host until all previous kernels have completed + // Wait for all three kernels to complete before accessing the results. + // This blocks the host until all previous kernels have completed. myQueue.wait(); std::cout << std::endl << "Result:" << std::endl; for (size_t i = 0; i < N; i++) { for (size_t j = 0; j < M; j++) { size_t index = i * M + j; - // Compare the result to the analytic value + // Compare the result to the analytic value. if (c[index] != i * (2 + 2014) + j * (1 + 42)) { std::cout << "Wrong value " << c[index] << " on element " << i << " " << j << std::endl; @@ -853,7 +853,7 @@ class invocation sub_group get_sub_group() const noexcept; - // Available for backwards compatibility only + // Available for backwards compatibility only. operator nd_item() const noexcept; }; From 9747f7a4ad3deecda7a422be4ba89730c458da59 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:15:53 +0000 Subject: [PATCH 04/73] Add + marks to code blocks containing ... --- .../sycl_khr_free_function_commands.adoc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index ed024758e..6c30d3a74 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -244,10 +244,10 @@ void launch_reduce(sycl::queue q, sycl::range<3> r, _Constraints_: The parameter pack consists of 0 or more objects created by the [code]#sycl::reduction# function. -_Effects (1-3)_: Equivalent to [code]#h.parallel_for(r, reductions..., k)#. +_Effects (1-3)_: Equivalent to [code]#+h.parallel_for(r, reductions..., k)+#. -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_reduce(h, r, k, reductions...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_reduce(h, r, k, reductions...); })+#. ''' @@ -406,11 +406,11 @@ void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, _Constraints_: The parameter pack consists of 0 or more objects created by the [code]#sycl::reduction# function. -_Effects (1-3)_: Equivalent to [code]#h.parallel_for(nd_range(r, size), -reductions..., k)#. +_Effects (1-3)_: Equivalent to [code]#+h.parallel_for(nd_range(r, size), +reductions..., k)+#. -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_grouped_reduce(h, r, size, k, reductions...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_grouped_reduce(h, r, size, k, reductions...); })+#. ''' From 2527e90051562e2b28d4c446d35674c2cfbf83b8 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:17:03 +0000 Subject: [PATCH 05/73] Require at least 1 reduction in *_reduce functions Previous "0 or more" wording only made sense when reductions could be optionally provided to functions like parallel_for; now that there are dedicated *_reduce functions, at least one reduction is required. --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 6c30d3a74..6e6e1fec1 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -241,7 +241,7 @@ void launch_reduce(sycl::queue q, sycl::range<3> r, } ---- -_Constraints_: The parameter pack consists of 0 or more objects created by the +_Constraints_: The parameter pack consists of 1 or more objects created by the [code]#sycl::reduction# function. _Effects (1-3)_: Equivalent to [code]#+h.parallel_for(r, reductions..., k)+#. @@ -403,7 +403,7 @@ void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, } ---- -_Constraints_: The parameter pack consists of 0 or more objects created by the +_Constraints_: The parameter pack consists of 1 or more objects created by the [code]#sycl::reduction# function. _Effects (1-3)_: Equivalent to [code]#+h.parallel_for(nd_range(r, size), From f63adeb07c1f10d43ce604a82349b1da8437e72b Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:20:13 +0000 Subject: [PATCH 06/73] Replace "must be" with "is" in constraints "is" is more consistent with ISO C++ wording. --- .../sycl_khr_free_function_commands.adoc | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 6e6e1fec1..ef276e0d2 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -533,7 +533,7 @@ void copy(sycl::queue q, const T* src, T* dest, size_t count); (2) Copies between two USM pointers. -_Constraints_: [code]#T# must be <>. +_Constraints_: [code]#T# is <>. _Preconditions_: [code]#src# and [code]#dest# must be host pointers or USM pointers accessible on the device. @@ -577,8 +577,8 @@ void copy(sycl::queue q, Copies from host to device. -_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. -[code]#DestMode# must be [code]#access_mode::write# or +_Constraints_: [code]#SrcT# and [code]#DestT# is <>. +[code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. _Preconditions_: [code]#src# must be a host pointer, pointing to an allocation @@ -621,8 +621,8 @@ void copy(sycl::queue q, Copies from device to host. -_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. -[code]#DestMode# must be [code]#access_mode::read# or +_Constraints_: [code]#SrcT# and [code]#DestT# is <>. +[code]#DestMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. _Preconditions_: [code]#dest# must be a host pointer, pointing to an allocation @@ -657,10 +657,9 @@ void copy(sycl::queue q, Copies between two device accessors. -_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. -[code]#SrcMode# must be [code]#access_mode::read# or -[code]#access_mode::read_write#. -[code]#DestMode# must be [code]#access_mode::write# or +_Constraints_: [code]#SrcT# and [code]#DestT# is <>. +[code]#SrcMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. +[code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. _Effects (11)_: Equivalent to [code]#h.copy(src, dest)#. @@ -739,7 +738,7 @@ void update_host(sycl::queue q, accessor acc); } ---- -_Constraints_: [code]#T# must be <>. +_Constraints_: [code]#T# is <>. _Effects (1)_: Equivalent to [code]#h.update_host(acc)#. From 47c08f8623d9be26f7116fd9da71fa42ba71aa02 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:27:17 +0000 Subject: [PATCH 07/73] Use bulleted list for multiple constraints --- .../sycl_khr_free_function_commands.adoc | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index ef276e0d2..4e60163be 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -577,9 +577,11 @@ void copy(sycl::queue q, Copies from host to device. -_Constraints_: [code]#SrcT# and [code]#DestT# is <>. -[code]#DestMode# is [code]#access_mode::write# or -[code]#access_mode::read_write#. +_Constraints_: + +* [code]#SrcT# and [code]#DestT# is <>. +* [code]#DestMode# is [code]#access_mode::write# or + [code]#access_mode::read_write#. _Preconditions_: [code]#src# must be a host pointer, pointing to an allocation of at least as many bytes as the range represented by [code]#dest#. @@ -621,9 +623,11 @@ void copy(sycl::queue q, Copies from device to host. -_Constraints_: [code]#SrcT# and [code]#DestT# is <>. -[code]#DestMode# is [code]#access_mode::read# or -[code]#access_mode::read_write#. +_Constraints_: + +* [code]#SrcT# and [code]#DestT# is <>. +* [code]#DestMode# is [code]#access_mode::read# or + [code]#access_mode::read_write#. _Preconditions_: [code]#dest# must be a host pointer, pointing to an allocation of at least as many bytes as the range represented by [code]#src#. @@ -657,10 +661,13 @@ void copy(sycl::queue q, Copies between two device accessors. -_Constraints_: [code]#SrcT# and [code]#DestT# is <>. -[code]#SrcMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. -[code]#DestMode# is [code]#access_mode::write# or -[code]#access_mode::read_write#. +_Constraints_: + +* [code]#SrcT# and [code]#DestT# is <>. +* [code]#SrcMode# is [code]#access_mode::read# or + [code]#access_mode::read_write#. +* [code]#DestMode# is [code]#access_mode::write# or + [code]#access_mode::read_write#. _Effects (11)_: Equivalent to [code]#h.copy(src, dest)#. From 15fd80ab652cb66e6239ef8a16ee15154aac1fdc Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:52:00 +0000 Subject: [PATCH 08/73] Rewrite preconditions for USM copy functions --- .../sycl_khr_free_function_commands.adoc | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 4e60163be..8f8e60b91 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -535,10 +535,16 @@ Copies between two USM pointers. _Constraints_: [code]#T# is <>. -_Preconditions_: [code]#src# and [code]#dest# must be host pointers or USM -pointers accessible on the device. -[code]#src# and [code]#dest# must point to allocations of at least [code]#count# -elements of type [code]#T#. +_Preconditions_: + +* [code]#src# is a host pointer or a pointer within a USM allocation that is + accessible on the device. +* [code]#dest# is a host pointer or a pointer within a USM allocation that is + accessible on the device. +* [code]#src# and [code]#dest# both point to allocations of at least + [code]#count# elements of type [code]#T#. +* If either [code]#src# or [code]#dest# is a pointer is to a USM allocation, + that allocation was created from the same context as the handler's queue. _Effects (1)_: Equivalent to [code]#h.copy(src, dest, count)#. @@ -583,8 +589,11 @@ _Constraints_: * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. -_Preconditions_: [code]#src# must be a host pointer, pointing to an allocation -of at least as many bytes as the range represented by [code]#dest#. +_Preconditions_: + +* [code]#src# is a host pointer. +* [code]#src# points to an allocation of at least as many bytes as the range + represented by [code]#dest#. _Effects (3-4)_: Equivalent to [code]#h.copy(src, dest)#. @@ -629,8 +638,11 @@ _Constraints_: * [code]#DestMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. -_Preconditions_: [code]#dest# must be a host pointer, pointing to an allocation -of at least as many bytes as the range represented by [code]#src#. +_Preconditions_: + +* [code]#dest# is a host pointer. +* [code]#dest# points to an allocation of at least as many bytes as the range + represented by [code]#src#. _Effects (7-8)_: Equivalent to [code]#h.copy(src, dest)#. From 9c627921aa959a199a29ce246491c5b995fdef58 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:54:13 +0000 Subject: [PATCH 09/73] Fix typo in non-normative note --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 8f8e60b91..0efb0ee2b 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -839,7 +839,7 @@ commands associated with [code]#events# (and any commands associated with other dependent events) have completed. {note}For both overloads, if [code]#events# is empty and an event barrier has no -other dependencies (e.g., specified by [code]#handler::depends_on#), it is ot +other dependencies (e.g., specified by [code]#handler::depends_on#), it is not required to wait for any commands unless the [code]#queue# is in-order.{endnote} ''' From 437754958e8597148fe1479d7a73691197aa947e Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 11:06:23 +0000 Subject: [PATCH 10/73] Define kernel object overloads via equivalence --- .../sycl_khr_free_function_commands.adoc | 68 +++++++++++++------ 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 0efb0ee2b..4406e1878 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -204,9 +204,16 @@ void launch(sycl::queue q, sycl::range<3> r, } ---- -_Effects_: Enqueues a kernel object that is invoked for every work-item in the -specified [code]#sycl::range#. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1-3)_: Equivalent to: + +[source,c++] +---- +h.set_args(args...); +h.parallel_for(r, k); +---- + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { launch(h, r, k, +args...); })#. ''' @@ -283,10 +290,15 @@ void launch_reduce(sycl::queue q, sycl::range<3> r, } ---- -_Effects_: Enqueues a kernel object that is invoked for every work-item in the -specified [code]#sycl::range#, where each work-item contributes to one or more -reductions. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1-3)_: Equivalent to: +[source,c++] +---- +h.set_args(args...); +h.parallel_for(r, k); +---- + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_reduce(h, r, k, args...); })#. ''' @@ -359,10 +371,15 @@ void launch_grouped(sycl::queue q, sycl::range<3> r, sycl::range<3> size, } ---- -_Effects_: Enqueues a kernel object that is invoked for every work-item in the -specified [code]#sycl::range#. -Work-items are grouped into work-groups of size [code]#size#. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1-3)_: Equivalent to: +[source,c++] +---- +h.set_args(args...); +h.parallel_for(nd_range(r, size), k); +---- + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_grouped(h, r, size, k, args...); })#. ''' @@ -451,11 +468,15 @@ void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, } ---- -_Effects_: Enqueues a kernel object that is invoked for every work-item in the -specified [code]#sycl::range#, where each work-item contributes to one or more -reductions. -Work-items are grouped into work-groups of size [code]#size#. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1-3)_: Equivalent to: +[source,c++] +---- +h.set_args(args...); +h.parallel_for(nd_range(r, size), k); +---- + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_grouped_reduce(h, r, size, k, args...); })#. ''' @@ -485,15 +506,22 @@ k); })#. namespace sycl::khr { template -void launch_task(sycl::queue q, const sycl::kernel& k, Args&&... args); (1) +void launch_task(sycl::handler& h, const sycl::kernel& k, Args&&... args); (1) template -void launch_task(sycl::handler& h, const sycl::kernel& k, Args&&... args); (2) +void launch_task(sycl::queue q, const sycl::kernel& k, Args&&... args); (2) } ---- -_Effects_: Enqueues a kernel object as a device task. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1)_: Equivalent to: +[source,c++] +---- +h.set_args(args...); +h.parallel_for(k); +---- + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { launch_task(h, k, +args...); })#. ''' From c24fb1303aae2d9310e6657db4b5fb955a52474c Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 11:16:47 +0000 Subject: [PATCH 11/73] Clarify dependencies for command_/event_barrier --- .../sycl_khr_free_function_commands.adoc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 4406e1878..2035280d0 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -844,9 +844,11 @@ void command_barrier(sycl::queue q); (2) } ---- _Effects_: Enqueues a command barrier. -Any commands submitted after this barrier cannot begin execution until all -previously submitted commands (and any commands associated with dependendent -events) have completed. +Any commands submitted after this barrier cannot begin execution until: + +* All commands previously submitted to this queue have completed; and +* All commands associated with this command's dependencies (e.g., via + `handler::depends_on`) have completed. ''' @@ -862,9 +864,11 @@ void event_barrier(sycl::queue q, const std::vector& events); (2 } ---- _Effects_: Enqueues an event barrier. -Any commands submitted after this barrier cannot begin execution until all -commands associated with [code]#events# (and any commands associated with other -dependent events) have completed. +Any commands submitted after this barrier cannot begin execution until: + +* All commands associated with [code]#events# have completed; and +* All commands associated with this command's dependencies (e.g., via + `handler::depends_on`) have completed. {note}For both overloads, if [code]#events# is empty and an event barrier has no other dependencies (e.g., specified by [code]#handler::depends_on#), it is not From 664a9129b3130c4cc24ea7d833bed5957eeea615 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 11:20:23 +0000 Subject: [PATCH 12/73] Clarify that event_barrier can be a no-op --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 2035280d0..88219cb9a 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -871,8 +871,8 @@ Any commands submitted after this barrier cannot begin execution until: `handler::depends_on`) have completed. {note}For both overloads, if [code]#events# is empty and an event barrier has no -other dependencies (e.g., specified by [code]#handler::depends_on#), it is not -required to wait for any commands unless the [code]#queue# is in-order.{endnote} +other dependencies (e.g., specified by [code]#handler::depends_on#), then this +operation is a no-op.{endnote} ''' From 16111b2d8a9e5ccd685d61a905dce9f158d094e8 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 11:40:39 +0000 Subject: [PATCH 13/73] Add missing invocation constructor --- .../sycl_khr_free_function_commands.adoc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 88219cb9a..f577cd246 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -904,6 +904,7 @@ class invocation sub_group get_sub_group() const noexcept; // Available for backwards compatibility only. + invocation(nd_item it) noexcept; operator nd_item() const noexcept; }; @@ -961,6 +962,21 @@ invocation belongs. ''' +.[apidef]#invocation constructor# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-invocation-constructor] +---- +invocation(nd_item() it) noexcept; +---- + +_Effects_: Constructs a [code]#invocation# representing the same work-item as +[code]#it#. + +{note}This function exists only to provide backwards compatibility with SYCL +2020 code in order to facilitate experimentation with the new interface proposed +by this extension.{endnote} + +''' + .[apidef]#nd_item conversion operator# [source,role=synopsis,id=api:khr-free-function-commands-invocation-nd_item-conversion-operator] ---- From 88b540aff2c79f9c2fba8b2cf0bfa917b39434dc Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 13:33:22 +0000 Subject: [PATCH 14/73] Restart numbering at 1 in each synopsis block --- .../sycl_khr_free_function_commands.adoc | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index f577cd246..0893d0dfe 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -589,22 +589,22 @@ namespace sycl::khr { template void copy(sycl::handler& h, const SrcT* src, - sycl::accessor dest); (3) + sycl::accessor dest); (1) template void copy(sycl::handler& h, std::shared_ptr src, - sycl::accessor dest); (4) + sycl::accessor dest); (2) template void copy(sycl::queue q, const SrcT* src, - sycl::accessor dest); (5) + sycl::accessor dest); (3) template void copy(sycl::queue q, std::shared_ptr src, - sycl::accessor dest); (6) + sycl::accessor dest); (4) } ---- @@ -623,9 +623,9 @@ _Preconditions_: * [code]#src# points to an allocation of at least as many bytes as the range represented by [code]#dest#. -_Effects (3-4)_: Equivalent to [code]#h.copy(src, dest)#. +_Effects (1-2)_: Equivalent to [code]#h.copy(src, dest)#. -_Effects (5-6)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, dest) })# ''' @@ -638,22 +638,22 @@ namespace sycl::khr { template void copy(sycl::handler& h, sycl::accessor src, - DestT* dest); (7) + DestT* dest); (1) template void copy(sycl::handler& h, sycl::accessor src, - std::shared_ptr dest); (8) + std::shared_ptr dest); (2) template void copy(sycl::queue q, sycl::accessor src, - DestT* dest); (9) + DestT* dest); (3) template void copy(sycl::queue q, sycl::accessor src, - std::shared_ptr dest); (10) + std::shared_ptr dest); (4) } ---- @@ -672,9 +672,9 @@ _Preconditions_: * [code]#dest# points to an allocation of at least as many bytes as the range represented by [code]#src#. -_Effects (7-8)_: Equivalent to [code]#h.copy(src, dest)#. +_Effects (1-2)_: Equivalent to [code]#h.copy(src, dest)#. -_Effects (9-10)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, dest); })#. ''' @@ -688,13 +688,13 @@ template void copy(sycl::queue q, sycl::accessor src, - sycl::accessor dest); (11) + sycl::accessor dest); (1) template void copy(sycl::queue q, sycl::accessor src, - sycl::accessor dest); (12) + sycl::accessor dest); (1) } ---- @@ -709,9 +709,9 @@ _Constraints_: * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. -_Effects (11)_: Equivalent to [code]#h.copy(src, dest)#. +_Effects (1)_: Equivalent to [code]#h.copy(src, dest)#. -_Effects (12)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, dest); })#. _Throws_: A synchronous [code]#exception# with the [code]#errc::invalid# error From 2575901220de997cdc88635a60253c4b6f95b143 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 14:22:17 +0000 Subject: [PATCH 15/73] Replace backticks with [code] environment --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 0893d0dfe..693b7dbc2 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -848,7 +848,7 @@ Any commands submitted after this barrier cannot begin execution until: * All commands previously submitted to this queue have completed; and * All commands associated with this command's dependencies (e.g., via - `handler::depends_on`) have completed. + [code]#handler::depends_on#) have completed. ''' @@ -868,7 +868,7 @@ Any commands submitted after this barrier cannot begin execution until: * All commands associated with [code]#events# have completed; and * All commands associated with this command's dependencies (e.g., via - `handler::depends_on`) have completed. + [code]#handler::depends_on#) have completed. {note}For both overloads, if [code]#events# is empty and an event barrier has no other dependencies (e.g., specified by [code]#handler::depends_on#), then this From 8fa1ce202f7ae9a4790ed27f386c8299f9656ab3 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 15:47:03 +0000 Subject: [PATCH 16/73] Add + marks to code blocks containing ... again --- .../sycl_khr_free_function_commands.adoc | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 693b7dbc2..36dbb91f6 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -212,8 +212,8 @@ h.set_args(args...); h.parallel_for(r, k); ---- -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { launch(h, r, k, -args...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { launch(h, r, +k, args...); })+#. ''' @@ -297,8 +297,8 @@ h.set_args(args...); h.parallel_for(r, k); ---- -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_reduce(h, r, k, args...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_reduce(h, r, k, args...); })+#. ''' @@ -378,8 +378,8 @@ h.set_args(args...); h.parallel_for(nd_range(r, size), k); ---- -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_grouped(h, r, size, k, args...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_grouped(h, r, size, k, args...); })+#. ''' @@ -475,8 +475,8 @@ h.set_args(args...); h.parallel_for(nd_range(r, size), k); ---- -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_grouped_reduce(h, r, size, k, args...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_grouped_reduce(h, r, size, k, args...); })+#. ''' @@ -520,8 +520,8 @@ h.set_args(args...); h.parallel_for(k); ---- -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { launch_task(h, k, -args...); })#. +_Effects (2)_: Equivalent to [code]#+q.submit([&](handler& h) { launch_task(h, +k, args...); })+#. ''' From 8d79af481e2e1c118c63e80980f3c4ca5f76010b Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 16:03:20 +0000 Subject: [PATCH 17/73] Fix grammar: "is" to "are" Co-authored-by: Greg Lueck --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 36dbb91f6..e8bf8d0c6 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -613,7 +613,7 @@ Copies from host to device. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>. +* [code]#SrcT# and [code]#DestT# are <>. * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. From 2ba23942e349613db64e21f47f0077a52ca617f6 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 16:05:30 +0000 Subject: [PATCH 18/73] Fix formatting of bulleted lists --- .../sycl_khr_free_function_commands.adoc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index e8bf8d0c6..ea96e3197 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -566,11 +566,11 @@ _Constraints_: [code]#T# is <>. _Preconditions_: * [code]#src# is a host pointer or a pointer within a USM allocation that is - accessible on the device. + accessible on the device; * [code]#dest# is a host pointer or a pointer within a USM allocation that is - accessible on the device. + accessible on the device; * [code]#src# and [code]#dest# both point to allocations of at least - [code]#count# elements of type [code]#T#. + [code]#count# elements of type [code]#T#; and * If either [code]#src# or [code]#dest# is a pointer is to a USM allocation, that allocation was created from the same context as the handler's queue. @@ -613,13 +613,13 @@ Copies from host to device. _Constraints_: -* [code]#SrcT# and [code]#DestT# are <>. +* [code]#SrcT# and [code]#DestT# are <>; and * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. _Preconditions_: -* [code]#src# is a host pointer. +* [code]#src# is a host pointer; and * [code]#src# points to an allocation of at least as many bytes as the range represented by [code]#dest#. @@ -662,13 +662,13 @@ Copies from device to host. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>. +* [code]#SrcT# and [code]#DestT# is <>; and * [code]#DestMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. _Preconditions_: -* [code]#dest# is a host pointer. +* [code]#dest# is a host pointer; and * [code]#dest# points to an allocation of at least as many bytes as the range represented by [code]#src#. @@ -703,9 +703,9 @@ Copies between two device accessors. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>. +* [code]#SrcT# and [code]#DestT# is <>; * [code]#SrcMode# is [code]#access_mode::read# or - [code]#access_mode::read_write#. + [code]#access_mode::read_write#; and * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. From e382dbcd7f01d88b35faa2c359f2a234483d9633 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 16:08:52 +0000 Subject: [PATCH 19/73] Fix more instances of "is" that should be "are" --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index ea96e3197..fbc7ff97d 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -662,7 +662,7 @@ Copies from device to host. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>; and +* [code]#SrcT# and [code]#DestT# are <>; and * [code]#DestMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. @@ -703,7 +703,7 @@ Copies between two device accessors. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>; +* [code]#SrcT# and [code]#DestT# are <>; * [code]#SrcMode# is [code]#access_mode::read# or [code]#access_mode::read_write#; and * [code]#DestMode# is [code]#access_mode::write# or From a9bdc102f37d54ae1cbadb103b67f4c992f7df45 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 16:10:26 +0000 Subject: [PATCH 20/73] Remove unnecessary device-copyable constraint There is no need to constrain T here because T must be device-copyable in order to construct the accessor passed as an argument. --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 -- 1 file changed, 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index fbc7ff97d..5ac212f0c 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -785,8 +785,6 @@ void update_host(sycl::queue q, accessor acc); } ---- -_Constraints_: [code]#T# is <>. - _Effects (1)_: Equivalent to [code]#h.update_host(acc)#. _Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { update_host(h, From 269706cd5c2255b77204cabdb75d6595a068baa2 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 17:02:12 +0000 Subject: [PATCH 21/73] Remove khr::invocation from free_function_commands Renaming sycl::nd_item is not a necessary part of the API redesign for submitting work, so it should be moved to its own extension. This will also give us more time to consider the design and naming of any proposed replacement(s), including how they should interact with new functionality proposed in other KHRs. --- .../sycl_khr_free_function_commands.adoc | 119 +----------------- 1 file changed, 3 insertions(+), 116 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 5ac212f0c..2e71256a2 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -76,9 +76,9 @@ int main() { // Use khr::launch_grouped to enqueue a kernel using groups without a handler. range<2> local = { 2, 2 }; range<2> global = { N, M }; - khr::launch_grouped(myQueue, global, local, [=](khr::invocation<2> ivc) { - size_t i = ivc.id(0); - size_t j = ivc.id(1); + khr::launch_grouped(myQueue, global, local, [=](sycl::nd_item<2> it) { + size_t i = it.get_global_id(0); + size_t j = it.get_global_id(1); size_t index = i * M + j; c[index] = a[index] + b[index]; }); @@ -874,119 +874,6 @@ operation is a no-op.{endnote} ''' -== [code]#invocation# class template - -The [code]#invocation# class template identifies an invocation of a kernel -function. - -Instances of the [code]#invocation# class template are not user-constructible -and are passed as an argument to each invocation of a kernel function. - -[source,role=synopsis] ----- -namespace sycl::khr { - -template -class invocation -{ - public: - static constexpr int dimensions = Dimensions; - - id id() const noexcept; - size_t linear_id() const noexcept; - - range range() const noexcept; - - group get_work_group() const noexcept; - - sub_group get_sub_group() const noexcept; - - // Available for backwards compatibility only. - invocation(nd_item it) noexcept; - operator nd_item() const noexcept; -}; - -} ----- - -.[apidef]#id# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-id] ----- -id id() const noexcept; ----- -_Returns_: The index of this invocation within the kernel dispatch. - -''' - -.[apidef]#linear_id# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-linear_id] ----- -size_t linear_id() const noexcept; ----- -_Returns_: The linearized index (see <>) of this -invocation within the kernel dispatch. - -''' - -.[apidef]#range# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-range] ----- -range range() const noexcept; ----- - -_Returns_: An index space representing all invocations of this kernel. - -''' - -.[apidef]#get_work_group# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-get_work_group] ----- -group get_work_group() const noexcept; ----- - -_Returns_: A [code]#group# representing the <> to which this -invocation belongs. - -''' - -.[apidef]#get_sub_group# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-get_sub_group] ----- -sub_group get_sub_group() const noexcept; ----- - -_Returns_: A [code]#sub_group# representing the sub-group to which this -invocation belongs. - -''' - -.[apidef]#invocation constructor# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-invocation-constructor] ----- -invocation(nd_item() it) noexcept; ----- - -_Effects_: Constructs a [code]#invocation# representing the same work-item as -[code]#it#. - -{note}This function exists only to provide backwards compatibility with SYCL -2020 code in order to facilitate experimentation with the new interface proposed -by this extension.{endnote} - -''' - -.[apidef]#nd_item conversion operator# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-nd_item-conversion-operator] ----- -operator nd_item() const noexcept; ----- - -_Returns_: An [code]#nd_item# representing this invocation. - -{note}This function exists only to provide backwards compatibility with SYCL -2020 code in order to facilitate experimentation with the new interface proposed -by this extension.{endnote} - == Issues None. From fa8a8f6d60e5d3df80c29974b4083a4ea4aa9445 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 9 Dec 2024 16:46:30 +0000 Subject: [PATCH 22/73] Remove empty issues section --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 2e71256a2..ecf2b1a60 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -873,7 +873,3 @@ other dependencies (e.g., specified by [code]#handler::depends_on#), then this operation is a no-op.{endnote} ''' - -== Issues - -None. From db380b4e051510952e31cd6a4bf7231d3954ec76 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 9 Dec 2024 16:53:39 +0000 Subject: [PATCH 23/73] Add missing constraints to fill overloads --- adoc/extensions/sycl_khr_free_function_commands.adoc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index ecf2b1a60..b410df5f7 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -760,6 +760,9 @@ void fill(sycl::queue q, } ---- + +_Constraints (1, 3)_: [code]#T# is <>. + _Effects (1)_: Equivalent to [code]#h.fill(ptr, pattern, count)#. _Effects (2)_: Equivalent to [code]#h.fill(dest, src)#. From d26831a0415669dc985a815e3fc89b6f1d34cabb Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 12 Dec 2024 15:26:40 +0000 Subject: [PATCH 24/73] Remove *_reduce functions for kernel objects There are currently no backends that define interop for reductions, so we can remove these functions for now. If we decide later that these functions are necessary, we can release a revision of the KHR. --- .../sycl_khr_free_function_commands.adoc | 92 ------------------- 1 file changed, 92 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index b410df5f7..a373e5657 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -257,49 +257,6 @@ _Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { launch_reduce(h, r, k, reductions...); })+#. -''' - -.[apititle]#launch_reduce# (kernel object) -[source,role=synopsis,id=api:launch_reduce-kernel] ----- -namespace sycl::khr { - -template -void launch_reduce(sycl::handler& h, sycl::range<1> r, - const sycl::kernel& k, Args&&... args); (1) - -template -void launch_reduce(sycl::handler& h, sycl::range<2> r, - const sycl::kernel& k, Args&&... args); (2) - -template -void launch_reduce(sycl::handler& h, sycl::range<3> r, - const sycl::kernel& k, Args&&... args); (3) - -template -void launch_reduce(sycl::queue q, sycl::range<1> r, - const sycl::kernel& k, Args&&... args); (4) - -template -void launch_reduce(sycl::queue q, sycl::range<2> r, - const sycl::kernel& k, Args&&... args); (5) - -template -void launch_reduce(sycl::queue q, sycl::range<3> r, - const sycl::kernel& k, Args&&... args); (6) - -} ----- -_Effects (1-3)_: Equivalent to: -[source,c++] ----- -h.set_args(args...); -h.parallel_for(r, k); ----- - -_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { -launch_reduce(h, r, k, args...); })+#. - ''' .[apititle]#launch_grouped# (kernel function) @@ -431,55 +388,6 @@ launch_grouped_reduce(h, r, size, k, reductions...); })+#. ''' -.[apititle]#launch_grouped_reduce# (kernel object) -[source,role=synopsis,id=api:launch_grouped_reduce-kernel] ----- -namespace sycl::khr { - -template -void launch_grouped_reduce(sycl::handler& h, sycl::range<1> r, - sycl::range<1> size, const sycl::kernel& k, - Args&&... args); (1) - -template -void launch_grouped_reduce(sycl::handler& h, sycl::range<2> r, - sycl::range<2> size, const sycl::kernel& k, - Args&&... args); (2) - -template -void launch_grouped_reduce(sycl::handler& h, sycl::range<3> r, - sycl::range<3> size, const sycl::kernel& k, - Args&&... args); (3) - -template -void launch_grouped_reduce(sycl::queue q, sycl::range<1> r, - sycl::range<1> size, const sycl::kernel& k, - Args&&... args); (4) - -template -void launch_grouped_reduce(sycl::queue q, sycl::range<2> r, - sycl::range<2> size, const sycl::kernel& k, - Args&&... args); (5) - -template -void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, - sycl::range<3> size, const sycl::kernel& k, - Args&&... args); (6) - -} ----- -_Effects (1-3)_: Equivalent to: -[source,c++] ----- -h.set_args(args...); -h.parallel_for(nd_range(r, size), k); ----- - -_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { -launch_grouped_reduce(h, r, size, k, args...); })+#. - -''' - .[apititle]#launch_task# (kernel function) [source,role=synopsis,id=api:launch_task] ---- From 75867f7a6595cff2a851977d5f78c6ce64ce1a9b Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 17 Jan 2025 13:02:31 +0000 Subject: [PATCH 25/73] Fix copy-paste error in launch_task definition --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index a373e5657..9ca72df7c 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -425,7 +425,7 @@ _Effects (1)_: Equivalent to: [source,c++] ---- h.set_args(args...); -h.parallel_for(k); +h.single_task(k); ---- _Effects (2)_: Equivalent to [code]#+q.submit([&](handler& h) { launch_task(h, From 151f632d82dc43e020aa26e87459a14c6b7312f2 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 20 Jan 2025 10:20:54 +0000 Subject: [PATCH 26/73] Remove unnecessary "is" Co-authored-by: Andrey Alekseenko --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 9ca72df7c..3d16b1749 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -479,7 +479,7 @@ _Preconditions_: accessible on the device; * [code]#src# and [code]#dest# both point to allocations of at least [code]#count# elements of type [code]#T#; and -* If either [code]#src# or [code]#dest# is a pointer is to a USM allocation, +* If either [code]#src# or [code]#dest# is a pointer to a USM allocation, that allocation was created from the same context as the handler's queue. _Effects (1)_: Equivalent to [code]#h.copy(src, dest, count)#. From 32d11f570526eae97ec8a3d9b40e3c4666d95f5c Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 20 Jan 2025 10:26:39 +0000 Subject: [PATCH 27/73] Explain potential performance overhead of events --- adoc/extensions/sycl_khr_free_function_commands.adoc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 3d16b1749..e86f7f63f 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -4,6 +4,13 @@ This extension provides an alternative mechanism for submitting commands to a device via free-functions that require developers to opt-in to the creation of [code]#event# objects. +The creation of [code]#event# objects may incur overheads that increase the +latency of submitting commands to devices, even if the [code]#event# object is +immediately discarded and never used. +Requiring developers to opt-in to the creation of events is therefore expected +to improve the performance of many SYCL programs, by ensuring that SYCL +developers only pay the cost associated with using events when necessary. + == Dependencies This extension has no dependencies on other extensions. @@ -479,8 +486,8 @@ _Preconditions_: accessible on the device; * [code]#src# and [code]#dest# both point to allocations of at least [code]#count# elements of type [code]#T#; and -* If either [code]#src# or [code]#dest# is a pointer to a USM allocation, - that allocation was created from the same context as the handler's queue. +* If either [code]#src# or [code]#dest# is a pointer to a USM allocation, that + allocation was created from the same context as the handler's queue. _Effects (1)_: Equivalent to [code]#h.copy(src, dest, count)#. From 165d07e13b7cfe379c44fe4da4544ed60ab05e69 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 20 Jan 2025 10:38:02 +0000 Subject: [PATCH 28/73] Add no-op note to command_barrier --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index e86f7f63f..b76e93fb0 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -766,6 +766,10 @@ Any commands submitted after this barrier cannot begin execution until: * All commands associated with this command's dependencies (e.g., via [code]#handler::depends_on#) have completed. +{note}If a [code]#command_barrier# is submitted to an in-order queue and has no +other dependencies (e.g., specified by [code]#handler::depends_on#), then this +operation is a no-op.{endnote} + ''' .[apititle]#event_barrier# From be56bb77ae628ffafbcbd783c251b32aef51e70e Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 20 Jan 2025 10:38:25 +0000 Subject: [PATCH 29/73] Weaken note about no-op from "is" to "may be" --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index b76e93fb0..c98d1102b 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -768,7 +768,7 @@ Any commands submitted after this barrier cannot begin execution until: {note}If a [code]#command_barrier# is submitted to an in-order queue and has no other dependencies (e.g., specified by [code]#handler::depends_on#), then this -operation is a no-op.{endnote} +operation may be a no-op.{endnote} ''' @@ -792,6 +792,6 @@ Any commands submitted after this barrier cannot begin execution until: {note}For both overloads, if [code]#events# is empty and an event barrier has no other dependencies (e.g., specified by [code]#handler::depends_on#), then this -operation is a no-op.{endnote} +operation may be a no-op.{endnote} ''' From 9897e6d814480a6a99677811271b96eba5006115 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 22 Jan 2025 15:27:40 +0000 Subject: [PATCH 30/73] Fix copy-paste error in khr::copy Co-authored-by: Nikita Kornev --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index c98d1102b..41d0fce50 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -601,7 +601,7 @@ namespace sycl::khr { template -void copy(sycl::queue q, +void copy(sycl::handler& h, sycl::accessor src, sycl::accessor dest); (1) From b756d9ec3d31de5bfd444e78cd22f42597df31ee Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 23 Jan 2025 15:49:28 +0000 Subject: [PATCH 31/73] Change KHR names to lower case --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 41d0fce50..813a16b33 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -1,4 +1,4 @@ -= SYCL_KHR_FREE_FUNCTION_COMMANDS += sycl_khr_free_function_commands This extension provides an alternative mechanism for submitting commands to a device via free-functions that require developers to opt-in to the creation of From e68a7c0f468019f955e12d9c49d89325e2e4fbe1 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 24 Jan 2025 13:20:41 +0000 Subject: [PATCH 32/73] Remove sycl:: in free-function-command synopses The convention used elsewhere is to omit sycl:: unless it's required for clarity (e.g., to distinguish between sycl:: and std:: classes and/or functions with the same name). --- .../sycl_khr_free_function_commands.adoc | 216 +++++++++--------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 813a16b33..bc16b208a 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -83,7 +83,7 @@ int main() { // Use khr::launch_grouped to enqueue a kernel using groups without a handler. range<2> local = { 2, 2 }; range<2> global = { N, M }; - khr::launch_grouped(myQueue, global, local, [=](sycl::nd_item<2> it) { + khr::launch_grouped(myQueue, global, local, [=](nd_item<2> it) { size_t i = it.get_global_id(0); size_t j = it.get_global_id(1); size_t index = i * M + j; @@ -122,7 +122,7 @@ int main() { namespace sycl::khr { template -void submit(sycl::queue q, CommandGroupFunc&& cgf); +void submit(queue q, CommandGroupFunc&& cgf); } ---- @@ -136,13 +136,13 @@ _Effects_: Equivalent to [code]#q.submit(cgf)#. namespace sycl::khr { template -sycl::event submit_tracked(sycl::queue q, CommandGroupFunc&& cgf); +event submit_tracked(queue q, CommandGroupFunc&& cgf); } ---- _Effects_: Equivalent to [code]#q.submit(cgf)#. -_Returns_: A [code]#sycl::event# associated with the submitted command. +_Returns_: An [code]#event# associated with the submitted command. ''' @@ -154,22 +154,22 @@ _Returns_: A [code]#sycl::event# associated with the submitted command. namespace sycl::khr { template -void launch(sycl::handler& h, sycl::range<1> r, const KernelType& k); (1) +void launch(handler& h, range<1> r, const KernelType& k); (1) template -void launch(sycl::handler& h, sycl::range<2> r, const KernelType& k); (2) +void launch(handler& h, range<2> r, const KernelType& k); (2) template -void launch(sycl::handler& h, sycl::range<3> r, const KernelType& k); (3) +void launch(handler& h, range<3> r, const KernelType& k); (3) template -void launch(sycl::queue q, sycl::range<1> r, const KernelType& k); (4) +void launch(queue q, range<1> r, const KernelType& k); (4) template -void launch(sycl::queue q, sycl::range<2> r, const KernelType& k); (5) +void launch(queue q, range<2> r, const KernelType& k); (5) template -void launch(sycl::queue q, sycl::range<3> r, const KernelType& k); (6) +void launch(queue q, range<3> r, const KernelType& k); (6) } ---- @@ -186,28 +186,28 @@ k); })#. namespace sycl::khr { template -void launch(sycl::handler& h, sycl::range<1> r, - const sycl::kernel& k, Args&&... args); (1) +void launch(handler& h, range<1> r, + const kernel& k, Args&&... args); (1) template -void launch(sycl::handler& h, sycl::range<2> r, - const sycl::kernel& k, Args&&... args); (2) +void launch(handler& h, range<2> r, + const kernel& k, Args&&... args); (2) template -void launch(sycl::handler& h, sycl::range<3> r, - const sycl::kernel& k, Args&&... args); (3) +void launch(handler& h, range<3> r, + const kernel& k, Args&&... args); (3) template -void launch(sycl::queue q, sycl::range<1> r, - const sycl::kernel& k, Args&&... args); (4) +void launch(queue q, range<1> r, + const kernel& k, Args&&... args); (4) template -void launch(sycl::queue q, sycl::range<2> r, - const sycl::kernel& k, Args&&... args); (5) +void launch(queue q, range<2> r, + const kernel& k, Args&&... args); (5) template -void launch(sycl::queue q, sycl::range<3> r, - const sycl::kernel& k, Args&&... args); (6) +void launch(queue q, range<3> r, + const kernel& k, Args&&... args); (6) } ---- @@ -230,33 +230,33 @@ k, args...); })+#. namespace sycl::khr { template -void launch_reduce(sycl::handler& h, sycl::range<1> r, +void launch_reduce(handler& h, range<1> r, const KernelType& k, Reductions&&... reductions); (1) template -void launch_reduce(sycl::handler& h, sycl::range<2> r, +void launch_reduce(handler& h, range<2> r, const KernelType& k, Reductions&&... reductions); (2) template -void launch_reduce(sycl::handler& h, sycl::range<3> r, +void launch_reduce(handler& h, range<3> r, const KernelType& k, Reductions&&... reductions); (3) template -void launch_reduce(sycl::queue q, sycl::range<1> r, +void launch_reduce(queue q, range<1> r, const KernelType& k, Reductions&&... reductions); (4) template -void launch_reduce(sycl::queue q, sycl::range<2> r, +void launch_reduce(queue q, range<2> r, const KernelType& k, Reductions&&... reductions); (5) template -void launch_reduce(sycl::queue q, sycl::range<3> r, +void launch_reduce(queue q, range<3> r, const KernelType& k, Reductions&&... reductions); (6) } ---- _Constraints_: The parameter pack consists of 1 or more objects created by the -[code]#sycl::reduction# function. +[code]#reduction# function. _Effects (1-3)_: Equivalent to [code]#+h.parallel_for(r, reductions..., k)+#. @@ -272,27 +272,27 @@ launch_reduce(h, r, k, reductions...); })+#. namespace sycl::khr { template -void launch_grouped(sycl::handler& h, sycl::range<1> r, sycl::range<1> size, +void launch_grouped(handler& h, range<1> r, range<1> size, const KernelType& k); (1) template -void launch_grouped(sycl::handler& h, sycl::range<2> r, sycl::range<2> size, +void launch_grouped(handler& h, range<2> r, range<2> size, const KernelType& k); (2) template -void launch_grouped(sycl::handler& h, sycl::range<3> r, sycl::range<3> size, +void launch_grouped(handler& h, range<3> r, range<3> size, const KernelType& k); (3) template -void launch_grouped(sycl::queue q, sycl::range<1> r, sycl::range<1> size, +void launch_grouped(queue q, range<1> r, range<1> size, const KernelType& k); (4) template -void launch_grouped(sycl::queue q, sycl::range<2> r, sycl::range<2> size, +void launch_grouped(queue q, range<2> r, range<2> size, const KernelType& k); (5) template -void launch_grouped(sycl::queue q, sycl::range<3> r, sycl::range<3> size, +void launch_grouped(queue q, range<3> r, range<3> size, const KernelType& k); (6) } @@ -310,28 +310,28 @@ launch_grouped(h, r, size, k); })#. namespace sycl::khr { template -void launch_grouped(sycl::handler& h, sycl::range<1> r, sycl::range<1> size, - const sycl::kernel& k, Args&&... args); (1) +void launch_grouped(handler& h, range<1> r, range<1> size, + const kernel& k, Args&&... args); (1) template -void launch_grouped(sycl::handler& h, sycl::range<2> r, sycl::range<2> size, - const sycl::kernel& k, Args&&... args); (2) +void launch_grouped(handler& h, range<2> r, range<2> size, + const kernel& k, Args&&... args); (2) template -void launch_grouped(sycl::handler& h, sycl::range<3> r, sycl::range<3> size, - const sycl::kernel& k, Args&&... args); (3) +void launch_grouped(handler& h, range<3> r, range<3> size, + const kernel& k, Args&&... args); (3) template -void launch_grouped(sycl::queue q, sycl::range<1> r, sycl::range<1> size, - const sycl::kernel& k, Args&&... args); (4) +void launch_grouped(queue q, range<1> r, range<1> size, + const kernel& k, Args&&... args); (4) template -void launch_grouped(sycl::queue q, sycl::range<2> r, sycl::range<2> size, - const sycl::kernel& k, Args&&... args); (5) +void launch_grouped(queue q, range<2> r, range<2> size, + const kernel& k, Args&&... args); (5) template -void launch_grouped(sycl::queue q, sycl::range<3> r, sycl::range<3> size, - const sycl::kernel& k, Args&&... args); (6) +void launch_grouped(queue q, range<3> r, range<3> size, + const kernel& k, Args&&... args); (6) } ---- @@ -353,39 +353,39 @@ launch_grouped(h, r, size, k, args...); })+#. namespace sycl::khr { template -void launch_grouped_reduce(sycl::handler& h, sycl::range<1> r, - sycl::range<1> size, const KernelType& k, +void launch_grouped_reduce(handler& h, range<1> r, + range<1> size, const KernelType& k, Reductions&&... reductions); (1) template -void launch_grouped_reduce(sycl::handler& h, sycl::range<2> r, - sycl::range<2> size, const KernelType& k, +void launch_grouped_reduce(handler& h, range<2> r, + range<2> size, const KernelType& k, Reductions&&... reductions); (2) template -void launch_grouped_reduce(sycl::handler& h, sycl::range<3> r, - sycl::range<3> size, const KernelType& k, +void launch_grouped_reduce(handler& h, range<3> r, + range<3> size, const KernelType& k, Reductions&&... reductions); (3) template -void launch_grouped_reduce(sycl::queue q, sycl::range<1> r, - sycl::range<1> size, const KernelType& k, +void launch_grouped_reduce(queue q, range<1> r, + range<1> size, const KernelType& k, Reductions&&... reductions); (4) template -void launch_grouped_reduce(sycl::queue q, sycl::range<2> r, - sycl::range<2> size, const KernelType& k, +void launch_grouped_reduce(queue q, range<2> r, + range<2> size, const KernelType& k, Reductions&&... reductions); (5) template -void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, - sycl::range<3> size, const KernelType& k, +void launch_grouped_reduce(queue q, range<3> r, + range<3> size, const KernelType& k, Reductions&&... reductions); (6) } ---- _Constraints_: The parameter pack consists of 1 or more objects created by the -[code]#sycl::reduction# function. +[code]#reduction# function. _Effects (1-3)_: Equivalent to [code]#+h.parallel_for(nd_range(r, size), reductions..., k)+#. @@ -401,10 +401,10 @@ launch_grouped_reduce(h, r, size, k, reductions...); })+#. namespace sycl::khr { template -void launch_task(sycl::handler& h, const KernelType& k); (1) +void launch_task(handler& h, const KernelType& k); (1) template -void launch_task(sycl::queue q, const KernelType& k); (2) +void launch_task(queue q, const KernelType& k); (2) } ---- @@ -421,10 +421,10 @@ k); })#. namespace sycl::khr { template -void launch_task(sycl::handler& h, const sycl::kernel& k, Args&&... args); (1) +void launch_task(handler& h, const kernel& k, Args&&... args); (1) template -void launch_task(sycl::queue q, const sycl::kernel& k, Args&&... args); (2) +void launch_task(queue q, const kernel& k, Args&&... args); (2) } ---- @@ -447,9 +447,9 @@ k, args...); })+#. ---- namespace sycl::khr { -void memcpy(sycl::handler& h, void* dest, const void* src, size_t numBytes); (1) +void memcpy(handler& h, void* dest, const void* src, size_t numBytes); (1) -void memcpy(sycl::queue q, void* dest, const void* src, size_t numBytes); (2) +void memcpy(queue q, void* dest, const void* src, size_t numBytes); (2) } ---- @@ -466,10 +466,10 @@ src, numBytes); })#. namespace sycl::khr { template -void copy(sycl::handler& h, const T* src, T* dest, size_t count); (1) +void copy(handler& h, const T* src, T* dest, size_t count); (1) template -void copy(sycl::queue q, const T* src, T* dest, size_t count); (2) +void copy(queue q, const T* src, T* dest, size_t count); (2) } ---- @@ -502,24 +502,24 @@ dest, count); })# namespace sycl::khr { template -void copy(sycl::handler& h, +void copy(handler& h, const SrcT* src, - sycl::accessor dest); (1) + accessor dest); (1) template -void copy(sycl::handler& h, +void copy(handler& h, std::shared_ptr src, - sycl::accessor dest); (2) + accessor dest); (2) template -void copy(sycl::queue q, +void copy(queue q, const SrcT* src, - sycl::accessor dest); (3) + accessor dest); (3) template -void copy(sycl::queue q, +void copy(queue q, std::shared_ptr src, - sycl::accessor dest); (4) + accessor dest); (4) } ---- @@ -551,23 +551,23 @@ dest) })# namespace sycl::khr { template -void copy(sycl::handler& h, - sycl::accessor src, +void copy(handler& h, + accessor src, DestT* dest); (1) template -void copy(sycl::handler& h, - sycl::accessor src, +void copy(handler& h, + accessor src, std::shared_ptr dest); (2) template -void copy(sycl::queue q, - sycl::accessor src, +void copy(queue q, + accessor src, DestT* dest); (3) template -void copy(sycl::queue q, - sycl::accessor src, +void copy(queue q, + accessor src, std::shared_ptr dest); (4) } @@ -601,15 +601,15 @@ namespace sycl::khr { template -void copy(sycl::handler& h, - sycl::accessor src, - sycl::accessor dest); (1) +void copy(handler& h, + accessor src, + accessor dest); (1) template -void copy(sycl::queue q, - sycl::accessor src, - sycl::accessor dest); (1) +void copy(queue q, + accessor src, + accessor dest); (1) } ---- @@ -639,9 +639,9 @@ code if [code]#dest.get_count() < src.get_count()#. ---- namespace sycl::khr { -void memset(sycl::handler& h, void* ptr, int value, size_t numBytes); (1) +void memset(handler& h, void* ptr, int value, size_t numBytes); (1) -void memset(sycl::queue q, void* ptr, int value, size_t numBytes); (2) +void memset(queue q, void* ptr, int value, size_t numBytes); (2) } ---- @@ -658,19 +658,19 @@ numBytes); })#. namespace sycl::khr { template -void fill(sycl::handler& h, T* ptr, const T& pattern, size_t count); (1) +void fill(handler& h, T* ptr, const T& pattern, size_t count); (1) template -void fill(sycl::handler& h, - sycl::accessor dest, +void fill(handler& h, + accessor dest, const T& src); (2) template -void fill(sycl::queue q, T* ptr, const T& pattern, size_t count); (3) +void fill(queue q, T* ptr, const T& pattern, size_t count); (3) template -void fill(sycl::queue q, - sycl::accessor dest, +void fill(queue q, + accessor dest, const T& src); (4) } @@ -696,10 +696,10 @@ src); })#. namespace sycl::khr { template -void update_host(sycl::handler& h, accessor acc); (1) +void update_host(handler& h, accessor acc); (1) template -void update_host(sycl::queue q, accessor acc); (2) +void update_host(queue q, accessor acc); (2) } ---- @@ -715,9 +715,9 @@ acc); })#. ---- namespace sycl::khr { -void prefetch(sycl::handler& h, void* ptr, size_t numBytes); (1) +void prefetch(handler& h, void* ptr, size_t numBytes); (1) -void prefetch(sycl::queue q, void* ptr, size_t numBytes); (2) +void prefetch(queue q, void* ptr, size_t numBytes); (2) } ---- @@ -733,9 +733,9 @@ numBytes); })#. ---- namespace sycl::khr { -void mem_advise(sycl::handler& h, void* ptr, size_t numBytes, int advice); (1) +void mem_advise(handler& h, void* ptr, size_t numBytes, int advice); (1) -void mem_advise(sycl::queue q, void* ptr, size_t numBytes, int advice); (2) +void mem_advise(queue q, void* ptr, size_t numBytes, int advice); (2) } ---- @@ -753,9 +753,9 @@ ptr, numBytes, advice); })#. ---- namespace sycl::khr { -void command_barrier(sycl::handler& h); (1) +void command_barrier(handler& h); (1) -void command_barrier(sycl::queue q); (2) +void command_barrier(queue q); (2) } ---- @@ -777,9 +777,9 @@ operation may be a no-op.{endnote} ---- namespace sycl::khr { -void event_barrier(sycl::handler& h, const std::vector& events); (1) +void event_barrier(handler& h, const std::vector& events); (1) -void event_barrier(sycl::queue q, const std::vector& events); (2) +void event_barrier(queue q, const std::vector& events); (2) } ---- From 96c101eb886093959d9787942a0eee1fd62c038b Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 24 Jan 2025 13:27:47 +0000 Subject: [PATCH 33/73] Add missing require() calls from queue overloads I had incorrectly assumed this would be covered by the "Equivalent to" wording, but we never say "Equivalent to" a queue:: shortcut, always to a handler:: function. --- .../sycl_khr_free_function_commands.adoc | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index bc16b208a..6613562c7 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -540,8 +540,8 @@ _Preconditions_: _Effects (1-2)_: Equivalent to [code]#h.copy(src, dest)#. -_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, -dest) })# +_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { +h.require(dest); copy(h, src, dest); })# ''' @@ -589,8 +589,8 @@ _Preconditions_: _Effects (1-2)_: Equivalent to [code]#h.copy(src, dest)#. -_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, -dest); })#. +_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { h.require(src); +copy(h, src, dest); })#. ''' @@ -626,8 +626,8 @@ _Constraints_: _Effects (1)_: Equivalent to [code]#h.copy(src, dest)#. -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, -dest); })#. +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { h.require(src); +h.require(dest); copy(h, src, dest); })#. _Throws_: A synchronous [code]#exception# with the [code]#errc::invalid# error code if [code]#dest.get_count() < src.get_count()#. @@ -685,8 +685,8 @@ _Effects (2)_: Equivalent to [code]#h.fill(dest, src)#. _Effects (3)_: Equivalent to [code]#q.submit([&](handler& h) { fill(h, ptr, pattern, count); })#. -_Effects (4)_: Equivalent to [code]#q.submit([&](handler& h) { fill(h, dest, -src); })#. +_Effects (4)_: Equivalent to [code]#q.submit([&](handler& h) { h.require(dest); +fill(h, dest, src); })#. ''' @@ -705,8 +705,8 @@ void update_host(queue q, accessor acc); (2) ---- _Effects (1)_: Equivalent to [code]#h.update_host(acc)#. -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { update_host(h, -acc); })#. +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { h.require(acc); +update_host(h, acc); })#. ''' From b279f37aa3f7495cd8ee5e90457bfeb2ada66136 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 24 Jan 2025 13:30:26 +0000 Subject: [PATCH 34/73] Fix alignment of overload numbers --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 6613562c7..a0b3b5336 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -663,7 +663,7 @@ void fill(handler& h, T* ptr, const T& pattern, size_t count); (1) template void fill(handler& h, accessor dest, - const T& src); (2) + const T& src); (2) template void fill(queue q, T* ptr, const T& pattern, size_t count); (3) @@ -671,7 +671,7 @@ void fill(queue q, T* ptr, const T& pattern, size_t count); (3) template void fill(queue q, accessor dest, - const T& src); (4) + const T& src); (4) } ---- From d7ce65f434fbc141955a1d583c757547e7e92745 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 24 Jan 2025 15:04:26 +0000 Subject: [PATCH 35/73] Fix parameter pack syntax --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index a0b3b5336..c38cffa19 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -420,10 +420,10 @@ k); })#. ---- namespace sycl::khr { -template +template void launch_task(handler& h, const kernel& k, Args&&... args); (1) -template +template void launch_task(queue q, const kernel& k, Args&&... args); (2) } From 19440ec74c4460a3dbc874eb3285def228cec039 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 28 Jan 2025 12:14:49 +0000 Subject: [PATCH 36/73] Add missing ptr parameter to memset --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index c38cffa19..b88faee8b 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -647,8 +647,8 @@ void memset(queue q, void* ptr, int value, size_t numBytes); (2) ---- _Effects (1)_: Equivalent to [code]#h.memset(ptr, value, numBytes)#. -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { memset(h, value, -numBytes); })#. +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { memset(h, ptr, +value, numBytes); })#. ''' From c63e28725ce8a85cb4eb342ccc191777151d10f7 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 25 Apr 2025 09:19:16 +0100 Subject: [PATCH 37/73] Use const queue& for khr_free_function_commands Using const queue& may allow implementations to improve performance by avoiding creating a copy of the queue. --- .../sycl_khr_free_function_commands.adoc | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index b88faee8b..eaebed2f4 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -122,7 +122,7 @@ int main() { namespace sycl::khr { template -void submit(queue q, CommandGroupFunc&& cgf); +void submit(const queue&, CommandGroupFunc&& cgf); } ---- @@ -136,7 +136,7 @@ _Effects_: Equivalent to [code]#q.submit(cgf)#. namespace sycl::khr { template -event submit_tracked(queue q, CommandGroupFunc&& cgf); +event submit_tracked(const queue&, CommandGroupFunc&& cgf); } ---- @@ -163,13 +163,13 @@ template void launch(handler& h, range<3> r, const KernelType& k); (3) template -void launch(queue q, range<1> r, const KernelType& k); (4) +void launch(const queue&, range<1> r, const KernelType& k); (4) template -void launch(queue q, range<2> r, const KernelType& k); (5) +void launch(const queue&, range<2> r, const KernelType& k); (5) template -void launch(queue q, range<3> r, const KernelType& k); (6) +void launch(const queue&, range<3> r, const KernelType& k); (6) } ---- @@ -198,15 +198,15 @@ void launch(handler& h, range<3> r, const kernel& k, Args&&... args); (3) template -void launch(queue q, range<1> r, +void launch(const queue&, range<1> r, const kernel& k, Args&&... args); (4) template -void launch(queue q, range<2> r, +void launch(const queue&, range<2> r, const kernel& k, Args&&... args); (5) template -void launch(queue q, range<3> r, +void launch(const queue&, range<3> r, const kernel& k, Args&&... args); (6) } @@ -242,15 +242,15 @@ void launch_reduce(handler& h, range<3> r, const KernelType& k, Reductions&&... reductions); (3) template -void launch_reduce(queue q, range<1> r, +void launch_reduce(const queue&, range<1> r, const KernelType& k, Reductions&&... reductions); (4) template -void launch_reduce(queue q, range<2> r, +void launch_reduce(const queue&, range<2> r, const KernelType& k, Reductions&&... reductions); (5) template -void launch_reduce(queue q, range<3> r, +void launch_reduce(const queue&, range<3> r, const KernelType& k, Reductions&&... reductions); (6) } @@ -284,15 +284,15 @@ void launch_grouped(handler& h, range<3> r, range<3> size, const KernelType& k); (3) template -void launch_grouped(queue q, range<1> r, range<1> size, +void launch_grouped(const queue&, range<1> r, range<1> size, const KernelType& k); (4) template -void launch_grouped(queue q, range<2> r, range<2> size, +void launch_grouped(const queue&, range<2> r, range<2> size, const KernelType& k); (5) template -void launch_grouped(queue q, range<3> r, range<3> size, +void launch_grouped(const queue&, range<3> r, range<3> size, const KernelType& k); (6) } @@ -322,15 +322,15 @@ void launch_grouped(handler& h, range<3> r, range<3> size, const kernel& k, Args&&... args); (3) template -void launch_grouped(queue q, range<1> r, range<1> size, +void launch_grouped(const queue&, range<1> r, range<1> size, const kernel& k, Args&&... args); (4) template -void launch_grouped(queue q, range<2> r, range<2> size, +void launch_grouped(const queue&, range<2> r, range<2> size, const kernel& k, Args&&... args); (5) template -void launch_grouped(queue q, range<3> r, range<3> size, +void launch_grouped(const queue&, range<3> r, range<3> size, const kernel& k, Args&&... args); (6) } @@ -368,17 +368,17 @@ void launch_grouped_reduce(handler& h, range<3> r, Reductions&&... reductions); (3) template -void launch_grouped_reduce(queue q, range<1> r, +void launch_grouped_reduce(const queue&, range<1> r, range<1> size, const KernelType& k, Reductions&&... reductions); (4) template -void launch_grouped_reduce(queue q, range<2> r, +void launch_grouped_reduce(const queue&, range<2> r, range<2> size, const KernelType& k, Reductions&&... reductions); (5) template -void launch_grouped_reduce(queue q, range<3> r, +void launch_grouped_reduce(const queue&, range<3> r, range<3> size, const KernelType& k, Reductions&&... reductions); (6) @@ -404,7 +404,7 @@ template void launch_task(handler& h, const KernelType& k); (1) template -void launch_task(queue q, const KernelType& k); (2) +void launch_task(const queue&, const KernelType& k); (2) } ---- @@ -424,7 +424,7 @@ template void launch_task(handler& h, const kernel& k, Args&&... args); (1) template -void launch_task(queue q, const kernel& k, Args&&... args); (2) +void launch_task(const queue&, const kernel& k, Args&&... args); (2) } ---- @@ -449,7 +449,7 @@ namespace sycl::khr { void memcpy(handler& h, void* dest, const void* src, size_t numBytes); (1) -void memcpy(queue q, void* dest, const void* src, size_t numBytes); (2) +void memcpy(const queue&, void* dest, const void* src, size_t numBytes); (2) } ---- @@ -469,7 +469,7 @@ template void copy(handler& h, const T* src, T* dest, size_t count); (1) template -void copy(queue q, const T* src, T* dest, size_t count); (2) +void copy(const queue&, const T* src, T* dest, size_t count); (2) } ---- @@ -512,12 +512,12 @@ void copy(handler& h, accessor dest); (2) template -void copy(queue q, +void copy(const queue&, const SrcT* src, accessor dest); (3) template -void copy(queue q, +void copy(const queue&, std::shared_ptr src, accessor dest); (4) @@ -561,12 +561,12 @@ void copy(handler& h, std::shared_ptr dest); (2) template -void copy(queue q, +void copy(const queue&, accessor src, DestT* dest); (3) template -void copy(queue q, +void copy(const queue&, accessor src, std::shared_ptr dest); (4) @@ -607,7 +607,7 @@ void copy(handler& h, template -void copy(queue q, +void copy(const queue&, accessor src, accessor dest); (1) @@ -641,7 +641,7 @@ namespace sycl::khr { void memset(handler& h, void* ptr, int value, size_t numBytes); (1) -void memset(queue q, void* ptr, int value, size_t numBytes); (2) +void memset(const queue&, void* ptr, int value, size_t numBytes); (2) } ---- @@ -666,10 +666,10 @@ void fill(handler& h, const T& src); (2) template -void fill(queue q, T* ptr, const T& pattern, size_t count); (3) +void fill(const queue&, T* ptr, const T& pattern, size_t count); (3) template -void fill(queue q, +void fill(const queue&, accessor dest, const T& src); (4) @@ -699,7 +699,7 @@ template void update_host(handler& h, accessor acc); (1) template -void update_host(queue q, accessor acc); (2) +void update_host(const queue&, accessor acc); (2) } ---- @@ -717,7 +717,7 @@ namespace sycl::khr { void prefetch(handler& h, void* ptr, size_t numBytes); (1) -void prefetch(queue q, void* ptr, size_t numBytes); (2) +void prefetch(const queue&, void* ptr, size_t numBytes); (2) } ---- @@ -735,7 +735,7 @@ namespace sycl::khr { void mem_advise(handler& h, void* ptr, size_t numBytes, int advice); (1) -void mem_advise(queue q, void* ptr, size_t numBytes, int advice); (2) +void mem_advise(const queue&, void* ptr, size_t numBytes, int advice); (2) } ---- @@ -755,7 +755,7 @@ namespace sycl::khr { void command_barrier(handler& h); (1) -void command_barrier(queue q); (2) +void command_barrier(const queue&); (2) } ---- @@ -779,7 +779,7 @@ namespace sycl::khr { void event_barrier(handler& h, const std::vector& events); (1) -void event_barrier(queue q, const std::vector& events); (2) +void event_barrier(const queue&, const std::vector& events); (2) } ---- From b314b770cd0ccd78ac290523dfbecfda0244d30c Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 25 Apr 2025 09:22:49 +0100 Subject: [PATCH 38/73] Fix alignment of (1), (2) in khr_free_functions Changes to names and cv-qualifiers resulted in inconsistent spacing. --- .../sycl_khr_free_function_commands.adoc | 58 +++++++++---------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index eaebed2f4..11fe40a0e 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -154,22 +154,22 @@ _Returns_: An [code]#event# associated with the submitted command. namespace sycl::khr { template -void launch(handler& h, range<1> r, const KernelType& k); (1) +void launch(handler& h, range<1> r, const KernelType& k); (1) template -void launch(handler& h, range<2> r, const KernelType& k); (2) +void launch(handler& h, range<2> r, const KernelType& k); (2) template -void launch(handler& h, range<3> r, const KernelType& k); (3) +void launch(handler& h, range<3> r, const KernelType& k); (3) template -void launch(const queue&, range<1> r, const KernelType& k); (4) +void launch(const queue&, range<1> r, const KernelType& k); (4) template -void launch(const queue&, range<2> r, const KernelType& k); (5) +void launch(const queue&, range<2> r, const KernelType& k); (5) template -void launch(const queue&, range<3> r, const KernelType& k); (6) +void launch(const queue&, range<3> r, const KernelType& k); (6) } ---- @@ -401,10 +401,10 @@ launch_grouped_reduce(h, r, size, k, reductions...); })+#. namespace sycl::khr { template -void launch_task(handler& h, const KernelType& k); (1) +void launch_task(handler& h, const KernelType& k); (1) template -void launch_task(const queue&, const KernelType& k); (2) +void launch_task(const queue&, const KernelType& k); (2) } ---- @@ -421,10 +421,10 @@ k); })#. namespace sycl::khr { template -void launch_task(handler& h, const kernel& k, Args&&... args); (1) +void launch_task(handler& h, const kernel& k, Args&&... args); (1) template -void launch_task(const queue&, const kernel& k, Args&&... args); (2) +void launch_task(const queue&, const kernel& k, Args&&... args); (2) } ---- @@ -447,9 +447,9 @@ k, args...); })+#. ---- namespace sycl::khr { -void memcpy(handler& h, void* dest, const void* src, size_t numBytes); (1) +void memcpy(handler& h, void* dest, const void* src, size_t numBytes); (1) -void memcpy(const queue&, void* dest, const void* src, size_t numBytes); (2) +void memcpy(const queue&, void* dest, const void* src, size_t numBytes); (2) } ---- @@ -466,10 +466,10 @@ src, numBytes); })#. namespace sycl::khr { template -void copy(handler& h, const T* src, T* dest, size_t count); (1) +void copy(handler& h, const T* src, T* dest, size_t count); (1) template -void copy(const queue&, const T* src, T* dest, size_t count); (2) +void copy(const queue&, const T* src, T* dest, size_t count); (2) } ---- @@ -609,7 +609,7 @@ template void copy(const queue&, accessor src, - accessor dest); (1) + accessor dest); (2) } ---- @@ -639,9 +639,9 @@ code if [code]#dest.get_count() < src.get_count()#. ---- namespace sycl::khr { -void memset(handler& h, void* ptr, int value, size_t numBytes); (1) +void memset(handler& h, void* ptr, int value, size_t numBytes); (1) -void memset(const queue&, void* ptr, int value, size_t numBytes); (2) +void memset(const queue&, void* ptr, int value, size_t numBytes); (2) } ---- @@ -666,12 +666,12 @@ void fill(handler& h, const T& src); (2) template -void fill(const queue&, T* ptr, const T& pattern, size_t count); (3) +void fill(const queue&, T* ptr, const T& pattern, size_t count); (3) template void fill(const queue&, accessor dest, - const T& src); (4) + const T& src); (4) } ---- @@ -696,10 +696,10 @@ fill(h, dest, src); })#. namespace sycl::khr { template -void update_host(handler& h, accessor acc); (1) +void update_host(handler& h, accessor acc); (1) template -void update_host(const queue&, accessor acc); (2) +void update_host(const queue&, accessor acc); (2) } ---- @@ -715,9 +715,9 @@ update_host(h, acc); })#. ---- namespace sycl::khr { -void prefetch(handler& h, void* ptr, size_t numBytes); (1) +void prefetch(handler& h, void* ptr, size_t numBytes); (1) -void prefetch(const queue&, void* ptr, size_t numBytes); (2) +void prefetch(const queue&, void* ptr, size_t numBytes); (2) } ---- @@ -733,9 +733,9 @@ numBytes); })#. ---- namespace sycl::khr { -void mem_advise(handler& h, void* ptr, size_t numBytes, int advice); (1) +void mem_advise(handler& h, void* ptr, size_t numBytes, int advice); (1) -void mem_advise(const queue&, void* ptr, size_t numBytes, int advice); (2) +void mem_advise(const queue&, void* ptr, size_t numBytes, int advice); (2) } ---- @@ -753,9 +753,9 @@ ptr, numBytes, advice); })#. ---- namespace sycl::khr { -void command_barrier(handler& h); (1) +void command_barrier(handler& h); (1) -void command_barrier(const queue&); (2) +void command_barrier(const queue&); (2) } ---- @@ -777,9 +777,9 @@ operation may be a no-op.{endnote} ---- namespace sycl::khr { -void event_barrier(handler& h, const std::vector& events); (1) +void event_barrier(handler& h, const std::vector& events); (1) -void event_barrier(const queue&, const std::vector& events); (2) +void event_barrier(const queue&, const std::vector& events); (2) } ---- From f1c9607bd5c2f6afb47c5c6f89ba99f78fb80850 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 29 Apr 2025 08:47:53 +0100 Subject: [PATCH 39/73] Add new line between extensions Co-authored-by: Pablo Reble --- adoc/extensions/index.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/index.adoc b/adoc/extensions/index.adoc index f341d9a7d..ca8996552 100644 --- a/adoc/extensions/index.adoc +++ b/adoc/extensions/index.adoc @@ -10,6 +10,6 @@ specification, but their design is subject to change. // leveloffset=2 allows extensions to be written as standalone documents // include::sycl_khr_extension_name.adoc[leveloffset=2] - include::sycl_khr_default_context.adoc[leveloffset=2] + include::sycl_khr_free_function_commands.adoc[leveloffset=2] From caf3b0ac54794ba150ff9e925e494ef096100ca5 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 19 May 2025 09:23:10 +0100 Subject: [PATCH 40/73] Forbid calling anything after a free function This restriction potentially improves performance by giving implementations the freedom to submit work immediately where possible. --- adoc/extensions/sycl_khr_free_function_commands.adoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 11fe40a0e..7d47bb5f5 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -114,6 +114,13 @@ int main() { == New free functions +Whenever one of the free functions in this section is used to associate a +command with a [code]#handler#, the free function must be the last operation +performed on the [code]#handler# before returning from the enclosing call to +[code]#submit# or [code]#submit_tracked#. +Calling any of the member functions of [code]#handler# after calling one of the +free functions in this section results in undefined behavior. + === Command-groups .[apititle]#submit# From ae4037998322f94e35e59f2f6a4b7fefb9a2b863 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 27 May 2025 09:23:27 +0100 Subject: [PATCH 41/73] Add comments with references to old APIs khr_free_function_commands renames several of the old enqueue APIs. The comments added in this commit are intended to help reviewers, and will not be visible in the specification. --- .../sycl_khr_free_function_commands.adoc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 7d47bb5f5..295cfb8e1 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -123,6 +123,7 @@ free functions in this section results in undefined behavior. === Command-groups +// Submit without creating an event. .[apititle]#submit# [source,role=synopsis,id=api:submit] ---- @@ -137,6 +138,7 @@ _Effects_: Equivalent to [code]#q.submit(cgf)#. ''' +// Submit and create an event. .[apititle]#submit_tracked# [source,role=synopsis,id=api:submit_tracked] ---- @@ -155,6 +157,8 @@ _Returns_: An [code]#event# associated with the submitted command. === Kernel launch +// Launch a basic parallel_for with a function object. +// New form of queue::parallel_for(range, ...) .[apititle]#launch# (kernel function) [source,role=synopsis,id=api:launch] ---- @@ -187,6 +191,8 @@ k); })#. ''' +// Launch a basic parallel_for with a sycl::kernel object. +// New form of handler::parallel_for(range, ...) without set_args. .[apititle]#launch# (kernel object) [source,role=synopsis,id=api:launch-kernel] ---- @@ -231,6 +237,8 @@ k, args...); })+#. ''' +// Launch a basic parallel_for with a function object and reductions. +// New form of parallel_for(range, reduction, ...) .[apititle]#launch_reduce# (kernel function) [source,role=synopsis,id=api:launch_reduce] ---- @@ -273,6 +281,8 @@ launch_reduce(h, r, k, reductions...); })+#. ''' +// Launch an ND-range parallel_for with a function object. +// New form of parallel_for(nd_range, ...) .[apititle]#launch_grouped# (kernel function) [source,role=synopsis,id=api:launch_grouped] ---- @@ -311,6 +321,8 @@ launch_grouped(h, r, size, k); })#. ''' +// Launch an ND-range parallel_for with a sycl::kernel object. +// New form of parallel_for(nd_range, ...) without set_args. .[apititle]#launch_grouped# (kernel object) [source,role=synopsis,id=api:launch_grouped-kernel] ---- @@ -354,6 +366,8 @@ launch_grouped(h, r, size, k, args...); })+#. ''' +// Launch an ND-range parallel_for with a function object and reductions. +// New form of parallel_for(nd_range, ...) .[apititle]#launch_grouped_reduce# (kernel function) [source,role=synopsis,id=api:launch_grouped_reduce] ---- @@ -402,6 +416,8 @@ launch_grouped_reduce(h, r, size, k, reductions...); })+#. ''' +// Launch a single work-item with a function object. +// New form of single_task(...) .[apititle]#launch_task# (kernel function) [source,role=synopsis,id=api:launch_task] ---- @@ -422,6 +438,8 @@ k); })#. ''' +// Launch a single work-item with a sycl::kernel object. +// New form of single_task(...) without set_args. .[apititle]#launch_task# (kernel object) [source,role=synopsis,id=api:launch_task-kernel] ---- From a9289febc809cc38ee55ab8ba379576792ba7b22 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 3 Jun 2025 09:05:06 +0100 Subject: [PATCH 42/73] Revert "Forbid calling anything after a free function" This reverts commit caf3b0ac54794ba150ff9e925e494ef096100ca5. After discussion, the SYCL WG decided that this was too error-prone. --- adoc/extensions/sycl_khr_free_function_commands.adoc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 295cfb8e1..797f51e50 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -114,13 +114,6 @@ int main() { == New free functions -Whenever one of the free functions in this section is used to associate a -command with a [code]#handler#, the free function must be the last operation -performed on the [code]#handler# before returning from the enclosing call to -[code]#submit# or [code]#submit_tracked#. -Calling any of the member functions of [code]#handler# after calling one of the -free functions in this section results in undefined behavior. - === Command-groups // Submit without creating an event. From 1e3737b4e55ad274798c542e6143c1327c832365 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 3 Jun 2025 15:10:31 +0100 Subject: [PATCH 43/73] Remove all handler overloads A more detailed investigation of performance overheads in SYCL implementations has uncovered that the cost associated with using a handler is similar to the cost associated with returning a sycl::event. This commit removes all the handler overloads from the KHR, as a first step towards introducing an alternative design that does not depend on handler. --- .../sycl_khr_free_function_commands.adoc | 513 +++++++----------- 1 file changed, 201 insertions(+), 312 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 797f51e50..9134e9beb 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -1,15 +1,17 @@ = sycl_khr_free_function_commands This extension provides an alternative mechanism for submitting commands to a -device via free-functions that require developers to opt-in to the creation of -[code]#event# objects. +device via free-functions that require developers to opt-in to the handling of +requirements and the creation of [code]#event# objects. The creation of [code]#event# objects may incur overheads that increase the latency of submitting commands to devices, even if the [code]#event# object is immediately discarded and never used. -Requiring developers to opt-in to the creation of events is therefore expected -to improve the performance of many SYCL programs, by ensuring that SYCL -developers only pay the cost associated with using events when necessary. +Similarly, using a [code]#handler# can incur overheads even when there are no +dependent events or accessors registered. +Requiring developers to opt-in to these features is therefore expected to +improve the performance of many SYCL programs, by ensuring that SYCL developers +only pay the associated costs when necessary. == Dependencies @@ -55,20 +57,13 @@ int main() { float* c = malloc_shared(N * M, myQueue); // Launch an asynchronous kernel to initialize a. - // Use khr::submit to enqueue a kernel via a handler. - khr::submit(myQueue, [&](handler& cgh) { - - // Enqueue a kernel iterating on a N*M 2D iteration space. - khr::launch(cgh, range<2> { N, M }, [=](id<2> index) { - size_t i = index[0]; - size_t j = index[1]; - a[i * M + j] = i * 2 + j; - }); - + khr::launch(myQueue, range<2> { N, M }, [=](id<2> index) { + size_t i = index[0]; + size_t j = index[1]; + a[i * M + j] = i * 2 + j; }); // Launch an asynchronous kernel to initialize b. - // Use khr::launch to enqueue a kernel without a handler. khr::launch(myQueue, range<2> { N, M }, [=](id<2> index) { size_t i = index[0]; size_t j = index[1]; @@ -80,7 +75,6 @@ int main() { khr::command_barrier(myQueue); // Launch an asynchronous kernel to compute matrix addition c = a + b. - // Use khr::launch_grouped to enqueue a kernel using groups without a handler. range<2> local = { 2, 2 }; range<2> global = { N, M }; khr::launch_grouped(myQueue, global, local, [=](nd_item<2> it) { @@ -114,40 +108,6 @@ int main() { == New free functions -=== Command-groups - -// Submit without creating an event. -.[apititle]#submit# -[source,role=synopsis,id=api:submit] ----- -namespace sycl::khr { - -template -void submit(const queue&, CommandGroupFunc&& cgf); - -} ----- -_Effects_: Equivalent to [code]#q.submit(cgf)#. - -''' - -// Submit and create an event. -.[apititle]#submit_tracked# -[source,role=synopsis,id=api:submit_tracked] ----- -namespace sycl::khr { - -template -event submit_tracked(const queue&, CommandGroupFunc&& cgf); - -} ----- -_Effects_: Equivalent to [code]#q.submit(cgf)#. - -_Returns_: An [code]#event# associated with the submitted command. - -''' - === Kernel launch // Launch a basic parallel_for with a function object. @@ -158,29 +118,25 @@ _Returns_: An [code]#event# associated with the submitted command. namespace sycl::khr { template -void launch(handler& h, range<1> r, const KernelType& k); (1) - -template -void launch(handler& h, range<2> r, const KernelType& k); (2) - -template -void launch(handler& h, range<3> r, const KernelType& k); (3) - -template -void launch(const queue&, range<1> r, const KernelType& k); (4) +void launch(const queue&, range<1> r, const KernelType& k); (1) template -void launch(const queue&, range<2> r, const KernelType& k); (5) +void launch(const queue&, range<2> r, const KernelType& k); (2) template -void launch(const queue&, range<3> r, const KernelType& k); (6) +void launch(const queue&, range<3> r, const KernelType& k); (3) } ---- -_Effects (1-3)_: Equivalent to [code]#h.parallel_for(r, k)#. -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { launch(h, r, -k); })#. +_Effects_: Equivalent to: + +[source,c++] +---- +q.submit([&](handler& h) { + h.parallel_for(r, k); +}); +---- ''' @@ -192,42 +148,29 @@ k); })#. namespace sycl::khr { template -void launch(handler& h, range<1> r, - const kernel& k, Args&&... args); (1) - -template -void launch(handler& h, range<2> r, - const kernel& k, Args&&... args); (2) - -template -void launch(handler& h, range<3> r, - const kernel& k, Args&&... args); (3) - -template -void launch(const queue&, range<1> r, - const kernel& k, Args&&... args); (4) +void launch(const queue&, range<1> r, const kernel& k, + Args&&... args); (1) template -void launch(const queue&, range<2> r, - const kernel& k, Args&&... args); (5) +void launch(const queue&, range<2> r, const kernel& k, + Args&&... args); (2) template -void launch(const queue&, range<3> r, - const kernel& k, Args&&... args); (6) +void launch(const queue&, range<3> r, const kernel& k, + Args&&... args); (3) } ---- -_Effects (1-3)_: Equivalent to: +_Effects_: Equivalent to: [source,c++] ---- -h.set_args(args...); -h.parallel_for(r, k); +q.submit([&](handler& h) { + h.set_args(args...); + h.parallel_for(r, k); +}); ---- -_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { launch(h, r, -k, args...); })+#. - ''' // Launch a basic parallel_for with a function object and reductions. @@ -237,40 +180,31 @@ k, args...); })+#. ---- namespace sycl::khr { -template -void launch_reduce(handler& h, range<1> r, - const KernelType& k, Reductions&&... reductions); (1) - -template -void launch_reduce(handler& h, range<2> r, - const KernelType& k, Reductions&&... reductions); (2) - -template -void launch_reduce(handler& h, range<3> r, - const KernelType& k, Reductions&&... reductions); (3) - template void launch_reduce(const queue&, range<1> r, - const KernelType& k, Reductions&&... reductions); (4) + const KernelType& k, Reductions&&... reductions); (1) template void launch_reduce(const queue&, range<2> r, - const KernelType& k, Reductions&&... reductions); (5) + const KernelType& k, Reductions&&... reductions); (2) template void launch_reduce(const queue&, range<3> r, - const KernelType& k, Reductions&&... reductions); (6) + const KernelType& k, Reductions&&... reductions); (3) } ---- _Constraints_: The parameter pack consists of 1 or more objects created by the [code]#reduction# function. -_Effects (1-3)_: Equivalent to [code]#+h.parallel_for(r, reductions..., k)+#. - -_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { -launch_reduce(h, r, k, reductions...); })+#. +_Effects_: Equivalent to: +[source,c++] +---- +q.submit([&](handler& h) { + h.parallel_for(r, reductions..., k); +}); +---- ''' @@ -282,35 +216,27 @@ launch_reduce(h, r, k, reductions...); })+#. namespace sycl::khr { template -void launch_grouped(handler& h, range<1> r, range<1> size, - const KernelType& k); (1) +void launch_grouped(const queue&, range<1> r, + range<1> size, const KernelType& k); (1) template -void launch_grouped(handler& h, range<2> r, range<2> size, - const KernelType& k); (2) +void launch_grouped(const queue&, range<2> r, + range<2> size, const KernelType& k); (2) template -void launch_grouped(handler& h, range<3> r, range<3> size, - const KernelType& k); (3) - -template -void launch_grouped(const queue&, range<1> r, range<1> size, - const KernelType& k); (4) - -template -void launch_grouped(const queue&, range<2> r, range<2> size, - const KernelType& k); (5) - -template -void launch_grouped(const queue&, range<3> r, range<3> size, - const KernelType& k); (6) +void launch_grouped(const queue&, range<3> r, + range<3> size, const KernelType& k); (3) } ---- -_Effects (1-3)_: Equivalent to [code]#h.parallel_for(nd_range(r, size), k)#. +_Effects_: Equivalent to: -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_grouped(h, r, size, k); })#. +[source,c++] +---- +q.submit([&](handler& h) { + h.parallel_for(nd_range(r, size), k); +}); +---- ''' @@ -321,42 +247,29 @@ launch_grouped(h, r, size, k); })#. ---- namespace sycl::khr { -template -void launch_grouped(handler& h, range<1> r, range<1> size, - const kernel& k, Args&&... args); (1) - -template -void launch_grouped(handler& h, range<2> r, range<2> size, - const kernel& k, Args&&... args); (2) - -template -void launch_grouped(handler& h, range<3> r, range<3> size, - const kernel& k, Args&&... args); (3) - template void launch_grouped(const queue&, range<1> r, range<1> size, - const kernel& k, Args&&... args); (4) + const kernel& k, Args&&... args); (1) template void launch_grouped(const queue&, range<2> r, range<2> size, - const kernel& k, Args&&... args); (5) + const kernel& k, Args&&... args); (2) template void launch_grouped(const queue&, range<3> r, range<3> size, - const kernel& k, Args&&... args); (6) + const kernel& k, Args&&... args); (3) } ---- -_Effects (1-3)_: Equivalent to: +_Effects_: Equivalent to: [source,c++] ---- -h.set_args(args...); -h.parallel_for(nd_range(r, size), k); +q.submit([&](handler& h) { + h.set_args(args...); + h.parallel_for(nd_range(r, size), k); +}); ---- -_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { -launch_grouped(h, r, size, k, args...); })+#. - ''' // Launch an ND-range parallel_for with a function object and reductions. @@ -367,45 +280,33 @@ launch_grouped(h, r, size, k, args...); })+#. namespace sycl::khr { template -void launch_grouped_reduce(handler& h, range<1> r, - range<1> size, const KernelType& k, +void launch_grouped_reduce(const queue&, + range<1> r, range<1> size, const KernelType& k, Reductions&&... reductions); (1) template -void launch_grouped_reduce(handler& h, range<2> r, - range<2> size, const KernelType& k, +void launch_grouped_reduce(const queue&, + range<2> r, range<2> size, const KernelType& k, Reductions&&... reductions); (2) template -void launch_grouped_reduce(handler& h, range<3> r, - range<3> size, const KernelType& k, +void launch_grouped_reduce(const queue&, + range<3> r, range<3> size, const KernelType& k, Reductions&&... reductions); (3) -template -void launch_grouped_reduce(const queue&, range<1> r, - range<1> size, const KernelType& k, - Reductions&&... reductions); (4) - -template -void launch_grouped_reduce(const queue&, range<2> r, - range<2> size, const KernelType& k, - Reductions&&... reductions); (5) - -template -void launch_grouped_reduce(const queue&, range<3> r, - range<3> size, const KernelType& k, - Reductions&&... reductions); (6) - } ---- _Constraints_: The parameter pack consists of 1 or more objects created by the [code]#reduction# function. -_Effects (1-3)_: Equivalent to [code]#+h.parallel_for(nd_range(r, size), -reductions..., k)+#. +_Effects_: Equivalent to: -_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { -launch_grouped_reduce(h, r, size, k, reductions...); })+#. +[source,c++] +---- +q.submit([&](handler& h) { + h.parallel_for(nd_range(r, size), reductions..., k); +}); +---- ''' @@ -417,17 +318,19 @@ launch_grouped_reduce(h, r, size, k, reductions...); })+#. namespace sycl::khr { template -void launch_task(handler& h, const KernelType& k); (1) - -template -void launch_task(const queue&, const KernelType& k); (2) +void launch_task(const queue&, const KernelType& k); } ---- -_Effects (1)_: Equivalent to [code]#h.single_task(k)#. -_Effects (2)_: Equivalent to [code]#h.submit([&](handler& h) { launch_task(h, -k); })#. +_Effects_: Equivalent to: + +[source,c++] +---- +q.submit([&](handler& h) { + h.single_task(k); +}); +---- ''' @@ -439,23 +342,19 @@ k); })#. namespace sycl::khr { template -void launch_task(handler& h, const kernel& k, Args&&... args); (1) - -template -void launch_task(const queue&, const kernel& k, Args&&... args); (2) +void launch_task(const queue&, const kernel& k, Args&&... args); } ---- -_Effects (1)_: Equivalent to: +_Effects_: Equivalent to: [source,c++] ---- -h.set_args(args...); -h.single_task(k); +q.submit([&](handler& h) { + h.set_args(args...); + h.single_task(k); +}); ---- -_Effects (2)_: Equivalent to [code]#+q.submit([&](handler& h) { launch_task(h, -k, args...); })+#. - ''' === Memory operations @@ -465,16 +364,19 @@ k, args...); })+#. ---- namespace sycl::khr { -void memcpy(handler& h, void* dest, const void* src, size_t numBytes); (1) - -void memcpy(const queue&, void* dest, const void* src, size_t numBytes); (2) +void memcpy(const queue&, void* dest, const void* src, size_t numBytes); } ---- -_Effects (1)_: Equivalent to [code]#h.memcpy(dest, src, numBytes)#. -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { memcpy(h, dest, -src, numBytes); })#. +_Effects_: Equivalent to: + +[source,c++] +---- +q.submit([&](handler& h) { + h.memcpy(dest, src, numBytes); +}); +---- ''' @@ -484,10 +386,7 @@ src, numBytes); })#. namespace sycl::khr { template -void copy(handler& h, const T* src, T* dest, size_t count); (1) - -template -void copy(const queue&, const T* src, T* dest, size_t count); (2) +void copy(const queue& q, const T* src, T* dest, size_t count); } ---- @@ -505,12 +404,16 @@ _Preconditions_: * [code]#src# and [code]#dest# both point to allocations of at least [code]#count# elements of type [code]#T#; and * If either [code]#src# or [code]#dest# is a pointer to a USM allocation, that - allocation was created from the same context as the handler's queue. + allocation was created from the same context associated with [code]#q#. -_Effects (1)_: Equivalent to [code]#h.copy(src, dest, count)#. +_Effects_: Equivalent to: -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, -dest, count); })# +[source,c++] +---- +q.submit([&](handler& h) { + h.copy(src, dest, count); +}); +---- ''' @@ -520,25 +423,13 @@ dest, count); })# namespace sycl::khr { template -void copy(handler& h, - const SrcT* src, +void copy(const queue&, const SrcT* src, accessor dest); (1) template -void copy(handler& h, - std::shared_ptr src, +void copy(const queue&, std::shared_ptr src, accessor dest); (2) -template -void copy(const queue&, - const SrcT* src, - accessor dest); (3) - -template -void copy(const queue&, - std::shared_ptr src, - accessor dest); (4) - } ---- @@ -556,10 +447,15 @@ _Preconditions_: * [code]#src# points to an allocation of at least as many bytes as the range represented by [code]#dest#. -_Effects (1-2)_: Equivalent to [code]#h.copy(src, dest)#. +_Effects_: Equivalent to: -_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { -h.require(dest); copy(h, src, dest); })# +[source,c++] +---- +q.submit([&](handler& h) { + h.require(dest); + h.copy(src, dest); +}); +---- ''' @@ -568,26 +464,16 @@ h.require(dest); copy(h, src, dest); })# ---- namespace sycl::khr { -template -void copy(handler& h, +template void + copy(const queue&, accessor src, DestT* dest); (1) -template -void copy(handler& h, +template void + copy(const queue&, accessor src, std::shared_ptr dest); (2) -template -void copy(const queue&, - accessor src, - DestT* dest); (3) - -template -void copy(const queue&, - accessor src, - std::shared_ptr dest); (4) - } ---- @@ -605,10 +491,15 @@ _Preconditions_: * [code]#dest# points to an allocation of at least as many bytes as the range represented by [code]#src#. -_Effects (1-2)_: Equivalent to [code]#h.copy(src, dest)#. +_Effects_: Equivalent to: -_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { h.require(src); -copy(h, src, dest); })#. +[source,c++] +---- +q.submit([&](handler& h) { + h.require(src); + h.copy(src, dest); +}); +---- ''' @@ -617,17 +508,10 @@ copy(h, src, dest); })#. ---- namespace sycl::khr { -template -void copy(handler& h, - accessor src, - accessor dest); (1) - -template -void copy(const queue&, - accessor src, - accessor dest); (2) +template +void copy(const queue&, accessor src, + accessor dest); } ---- @@ -642,10 +526,16 @@ _Constraints_: * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. -_Effects (1)_: Equivalent to [code]#h.copy(src, dest)#. +_Effects_: Equivalent to: -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { h.require(src); -h.require(dest); copy(h, src, dest); })#. +[source,c++] +---- +q.submit([&](handler& h) { + h.require(src); + h.require(dest); + h.copy(src, dest); +}); +---- _Throws_: A synchronous [code]#exception# with the [code]#errc::invalid# error code if [code]#dest.get_count() < src.get_count()#. @@ -657,16 +547,18 @@ code if [code]#dest.get_count() < src.get_count()#. ---- namespace sycl::khr { -void memset(handler& h, void* ptr, int value, size_t numBytes); (1) - -void memset(const queue&, void* ptr, int value, size_t numBytes); (2) +void memset(const queue&, void* ptr, int value, size_t numBytes); } ---- -_Effects (1)_: Equivalent to [code]#h.memset(ptr, value, numBytes)#. +_Effects_: Equivalent to: -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { memset(h, ptr, -value, numBytes); })#. +[source,c++] +---- +q.submit([&](handler& h) { + h.memset(ptr, value, numBytes); +}); +---- ''' @@ -676,35 +568,34 @@ value, numBytes); })#. namespace sycl::khr { template -void fill(handler& h, T* ptr, const T& pattern, size_t count); (1) - -template -void fill(handler& h, - accessor dest, - const T& src); (2) - -template -void fill(const queue&, T* ptr, const T& pattern, size_t count); (3) +void fill(const queue&, T* ptr, const T& pattern, size_t count); (1) template void fill(const queue&, - accessor dest, - const T& src); (4) + accessor dest, const T& src); (2) } ---- -_Constraints (1, 3)_: [code]#T# is <>. +_Constraints (1)_: [code]#T# is <>. -_Effects (1)_: Equivalent to [code]#h.fill(ptr, pattern, count)#. +_Effects (1)_: Equivalent to: -_Effects (2)_: Equivalent to [code]#h.fill(dest, src)#. +[source,c++] +---- +q.submit([&](handler& h) { + h.fill(ptr, pattern, count); +}); +---- -_Effects (3)_: Equivalent to [code]#q.submit([&](handler& h) { fill(h, ptr, -pattern, count); })#. +_Effects (2)_: Equivalent to: -_Effects (4)_: Equivalent to [code]#q.submit([&](handler& h) { h.require(dest); -fill(h, dest, src); })#. +[source,c++] +---- +q.submit([&](handler& h) { + h.fill(dest, src); +}); +---- ''' @@ -714,17 +605,19 @@ fill(h, dest, src); })#. namespace sycl::khr { template -void update_host(handler& h, accessor acc); (1) - -template -void update_host(const queue&, accessor acc); (2) +void update_host(const queue&, accessor acc); } ---- -_Effects (1)_: Equivalent to [code]#h.update_host(acc)#. +_Effects_: Equivalent to: -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { h.require(acc); -update_host(h, acc); })#. +[source,c++] +---- +q.submit([&](handler& h) { + h.require(acc); + h.update_host(acc); +}); +---- ''' @@ -733,16 +626,18 @@ update_host(h, acc); })#. ---- namespace sycl::khr { -void prefetch(handler& h, void* ptr, size_t numBytes); (1) - -void prefetch(const queue&, void* ptr, size_t numBytes); (2) +void prefetch(const queue&, void* ptr, size_t numBytes); } ---- -_Effects (1)_: Equivalent to [code]#h.prefetch(ptr, numBytes)#. +_Effects_: Equivalent to: -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { prefetch(h, ptr, -numBytes); })#. +[source,c++] +---- +q.submit([&](handler& h) { + h.prefetch(ptr, numBytes); +}); +---- ''' @@ -751,16 +646,18 @@ numBytes); })#. ---- namespace sycl::khr { -void mem_advise(handler& h, void* ptr, size_t numBytes, int advice); (1) - -void mem_advise(const queue&, void* ptr, size_t numBytes, int advice); (2) +void mem_advise(const queue&, void* ptr, size_t numBytes, int advice); } ---- -_Effects (1)_: Equivalent to [code]#h.mem_advise(ptr, numBytes, advice)#. +_Effects_: Equivalent to: -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { mem_advise(h, -ptr, numBytes, advice); })#. +[source,c++] +---- +q.submit([&](handler& h) { + h.mem_advise(ptr, numBytes, advice); +}); +---- ''' @@ -771,9 +668,7 @@ ptr, numBytes, advice); })#. ---- namespace sycl::khr { -void command_barrier(handler& h); (1) - -void command_barrier(const queue&); (2) +void command_barrier(const queue&); } ---- @@ -781,12 +676,10 @@ _Effects_: Enqueues a command barrier. Any commands submitted after this barrier cannot begin execution until: * All commands previously submitted to this queue have completed; and -* All commands associated with this command's dependencies (e.g., via - [code]#handler::depends_on#) have completed. +* All commands associated with this command's dependencies have completed. {note}If a [code]#command_barrier# is submitted to an in-order queue and has no -other dependencies (e.g., specified by [code]#handler::depends_on#), then this -operation may be a no-op.{endnote} +other dependencies, then this operation may be a no-op.{endnote} ''' @@ -795,9 +688,7 @@ operation may be a no-op.{endnote} ---- namespace sycl::khr { -void event_barrier(handler& h, const std::vector& events); (1) - -void event_barrier(const queue&, const std::vector& events); (2) +void event_barrier(const queue&, const std::vector& events); } ---- @@ -805,11 +696,9 @@ _Effects_: Enqueues an event barrier. Any commands submitted after this barrier cannot begin execution until: * All commands associated with [code]#events# have completed; and -* All commands associated with this command's dependencies (e.g., via - [code]#handler::depends_on#) have completed. +* All commands associated with this command's dependencies have completed. {note}For both overloads, if [code]#events# is empty and an event barrier has no -other dependencies (e.g., specified by [code]#handler::depends_on#), then this -operation may be a no-op.{endnote} +other dependencies, then this operation may be a no-op.{endnote} ''' From f9ca360a5b5400a6cb8828038d6f6c4961e30381 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 4 Jun 2025 11:25:48 +0100 Subject: [PATCH 44/73] Add khr::requirements class and overloads An instance of the requirements class represents all of the scheduling requirements that must be satisfied when submitting a command. It acts as a replacement for handler that delivers two main improvements: 1) All requirements are captured at once (by the requirements constructor), allowing the presence or absence of specific requirements to be detected at compile-time. 2) All requirements are passed as an argument to the command function, allowing the command function to enqueue work immediately. There are some existing APIs that were dependent on handler that do not yet have a requirements-based equivalent, including: - local memory - specialization constants - kernel bundles --- adoc/config/rouge/lib/rouge/lexers/sycl.rb | 9 + .../rouge/lib/rouge/themes/sycl_spec.rb | 3 +- .../sycl_khr_free_function_commands.adoc | 527 +++++++++++++----- 3 files changed, 384 insertions(+), 155 deletions(-) diff --git a/adoc/config/rouge/lib/rouge/lexers/sycl.rb b/adoc/config/rouge/lib/rouge/lexers/sycl.rb index d8e3ca6cd..46e3902c4 100644 --- a/adoc/config/rouge/lib/rouge/lexers/sycl.rb +++ b/adoc/config/rouge/lib/rouge/lexers/sycl.rb @@ -436,6 +436,13 @@ class Sycl < Cpp replace_me # Replace with list of actual keywords ) + # Exposition-only identifiers + sycl_exposition_only = %w( + register-events + register-accessors + has-tracking + ) + # Here are some interesting tokens # https://pygments.org/docs/tokens/ unused in C++ we can reuse # in SYCL mode: @@ -466,6 +473,8 @@ class Sycl < Cpp # Insert some specific rules at the beginning of the statement # rule of the C++ lexer prepend :statements do + rule %r/(?:#{sycl_exposition_only.join('|')})\b/, + Generic::Emph rule %r/(?:#{sycl_data_types.join('|')})\b/, Keyword::Pseudo rule %r/(?:#{sycl_functions.join('|')})\b/, diff --git a/adoc/config/rouge/lib/rouge/themes/sycl_spec.rb b/adoc/config/rouge/lib/rouge/themes/sycl_spec.rb index 95a8c34f7..b5e5d2129 100644 --- a/adoc/config/rouge/lib/rouge/themes/sycl_spec.rb +++ b/adoc/config/rouge/lib/rouge/themes/sycl_spec.rb @@ -33,7 +33,8 @@ class SYCLspec < Github style Comment::Single, :fg => '#9acd32' # Use a clearer white background style Text, :bg => '#ffffff' - + # Render exposition-only functions in italics to match ISO C++ + style Generic::Emph, :fg => '#000000', :italic => true end end end diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 9134e9beb..55bc1c3bd 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -106,6 +106,131 @@ int main() { } ---- +== Requirements + +An instance of the [code]#requirements# class template encapsulates all of the +requirements for scheduling a command. + +[source,role=synopsis] +---- +namespace sycl::khr { + +template +struct is_requirement; + +template +static constexpr bool is_requirement_v = is_requirement::value_type; + +template +class requirements { +public: + // Only available if: all Requirements satisfy is_requirement_v + requirements(Requirements... values); + + void register-accessors(handler& h) const; // exposition only + void register-events(handler& h) const; // exposition only + bool has-tracking() const; // exposition only +}; + +class tracking { +public: + tracking(bool enabled=true); +}; + +} +---- + +Each instance of a type listed below defines a specific scheduling requirement. +For each type, the [code]#is_requirement# type trait is specialized such that +[code]#is_requirement_v# returns [code]#true#. + +* [code]#event#: The command must not begin executing until the event is + complete. + +* [code]#std::vector#: The command must not begin executing until all + events in the vector are complete. + +* [code]#accessor#: The command must not begin executing until the + [code]#buffer# associated with the [code]#accessor# can be accessed in a + manner compatible with the specified [code]#access_mode#. + +* [code]#tracking#: The command must be submitted such that its state can be + tracked via an [code]#event# when the [code]#tracking# object is constructed + with an [code]#enabled# value of [code]#true#. + +''' + +.[apititle]#Default constructor# +[source,role=synopsis,id=api:requirements-ctor] +---- +template +requirements(Requirements... values); +---- + +_Constraints_: [code]#is_requirement_v# returns [code]#true# for each type in +[code]#Requirements#. + +_Effects_: Constructs a [code]#requirements# object representing the set of +requirements specified via the [code]#values# parameter pack. + +_Remarks_: If an instance of a requirement appears more than once in the +[code]#values# parameter pack, the [code]#requirements# object behaves as if it +had only been specified once. + +''' + +.[apititle]#requirements::register-accessors# +[source,role=synopsis,id=api:register-accessors] +---- +void register-accessors(handler& h) const; // exposition only +---- + +_Effects_: Calls [code]#h.require# for each [code]#accessor# passed to the +constructor of this [code]#requirements# object. + +''' + +.[apititle]#requirements::register-events# +[source,role=synopsis,id=api:register-events] +---- +void register-events(handler& h) const; // exposition only +---- + +_Effects_: Calls [code]#h.depends_on# for each [code]#event# or +[code]#std::vector# passed to the constructor of this +[code]#requirements# object. + +''' + +.[apititle]#requirements::has-tracking# +[source,role=synopsis,id=api:has-tracking] +---- +bool has-tracking() const; // exposition only +---- + +_Returns_: [code]#true# if this [code]#requirements# object was constructed with +a [code]#tracking# object with tracking enabled, and [code]#false# otherwise. + +''' + +.[apititle]#tracking# constructor +[source,role=synopsis,id=api:tracking-ctor] +---- +namespace sycl::khr { + +tracking(bool enabled=true); + +} +---- + +_Effects_: Construct a [code]#tracking# object, representing a requirement that +a command must be submitted such that its state can be tracked via an +[code]#event# when [code]#enabled# is [code]#true#. + +{note}If an [code]#event# is _not_ required, [code]#tracking(false)# should be +expected to introduce a small amount of overhead compared to providing no +[code]#tracking# requirement.{endnote} + == New free functions === Kernel launch @@ -117,27 +242,40 @@ int main() { ---- namespace sycl::khr { +template +std::optional launch(const queue& q, range<1> r, const requirements& reqs, const KernelType& k); (1) + +template +std::optional launch(const queue& q, range<2> r, const requirements& reqs, const KernelType& k); (2) + +template +std::optional launch(const queue& q, range<3> r, const requirements& reqs, const KernelType& k); (3) + template -void launch(const queue&, range<1> r, const KernelType& k); (1) +std::optional launch(const queue& q, range<1> r, const KernelType& k); (4) template -void launch(const queue&, range<2> r, const KernelType& k); (2) +std::optional launch(const queue& q, range<2> r, const KernelType& k); (5) template -void launch(const queue&, range<3> r, const KernelType& k); (3) +std::optional launch(const queue& q, range<3> r, const KernelType& k); (6) } ---- -_Effects_: Equivalent to: - -[source,c++] +_Effects_ (1-3): Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.parallel_for(r, k); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- +_Effects_ (4-6): Equivalent to: [code]#return launch(q, r, {}, k);#. + ''' // Launch a basic parallel_for with a sycl::kernel object. @@ -147,30 +285,40 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { +template +std::optional launch(const queue& q, range<1> r, const requirements& reqs, const kernel& k, Args&&... args); (1) + +template +std::optional launch(const queue& q, range<2> r, const requirements& reqs, const kernel& k, Args&&... args); (2) + +template +std::optional launch(const queue& q, range<3> r, const requirements& reqs, const kernel& k, Args&&... args); (3) + template -void launch(const queue&, range<1> r, const kernel& k, - Args&&... args); (1) +std::optional launch(const queue& q, range<1> r, const kernel& k, Args&&... args); (4) template -void launch(const queue&, range<2> r, const kernel& k, - Args&&... args); (2) +std::optional launch(const queue& q, range<2> r, const kernel& k, Args&&... args); (5) template -void launch(const queue&, range<3> r, const kernel& k, - Args&&... args); (3) +std::optional launch(const queue& q, range<3> r, const kernel& k, Args&&... args); (6) } ---- -_Effects_: Equivalent to: - -[source,c++] +_Effects_ (1-3): Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.set_args(args...); h.parallel_for(r, k); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- +_Effects_ (4-6): Equivalent to: [code]#+return launch(q, r, {}, k, args...);+#. + ''' // Launch a basic parallel_for with a function object and reductions. @@ -180,32 +328,43 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { +template +std::optional launch_reduce(const queue& q, range<1> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) + +template +std::optional launch_reduce(const queue& q, range<2> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) + +template +std::optional launch_reduce(const queue& q, range<3> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) + template -void launch_reduce(const queue&, range<1> r, - const KernelType& k, Reductions&&... reductions); (1) +std::optional launch_reduce(const queue& q, range<1> r, const KernelType& k, Reductions&&... reductions); (4) template -void launch_reduce(const queue&, range<2> r, - const KernelType& k, Reductions&&... reductions); (2) +std::optional launch_reduce(const queue& q, range<2> r, const KernelType& k, Reductions&&... reductions); (5) template -void launch_reduce(const queue&, range<3> r, - const KernelType& k, Reductions&&... reductions); (3) +std::optional launch_reduce(const queue& q, range<3> r, const KernelType& k, Reductions&&... reductions); (6) } ---- _Constraints_: The parameter pack consists of 1 or more objects created by the [code]#reduction# function. -_Effects_: Equivalent to: - -[source,c++] +_Effects_ (1-3): Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.parallel_for(r, reductions..., k); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- +_Effects_ (4-6): Equivalent to [code]#+return launch_reduce(q, r, {}, +reductions...);+#. + ''' // Launch an ND-range parallel_for with a function object. @@ -215,29 +374,40 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { +template +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k); (1) + +template +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k); (2) + +template +std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k); (3) + template -void launch_grouped(const queue&, range<1> r, - range<1> size, const KernelType& k); (1) +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const KernelType& k); (4) template -void launch_grouped(const queue&, range<2> r, - range<2> size, const KernelType& k); (2) +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const KernelType& k); (5) template -void launch_grouped(const queue&, range<3> r, - range<3> size, const KernelType& k); (3) +std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const KernelType& k); (6) } ---- -_Effects_: Equivalent to: - -[source,c++] +_Effects_ (1-3): Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.parallel_for(nd_range(r, size), k); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- +_Effects_ (4-6): Equivalent to [code]#return launch_grouped(q, r, size, {}, +k);#. + ''' // Launch an ND-range parallel_for with a sycl::kernel object. @@ -247,29 +417,41 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { +template +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const kernel& k, Args&&... args); (1) + +template +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const kernel& k, Args&&... args); (2) + +template +std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const kernel& k, Args&&... args); (3) + template -void launch_grouped(const queue&, range<1> r, range<1> size, - const kernel& k, Args&&... args); (1) +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const kernel& k, Args&&... args); (4) template -void launch_grouped(const queue&, range<2> r, range<2> size, - const kernel& k, Args&&... args); (2) +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (5) template -void launch_grouped(const queue&, range<3> r, range<3> size, - const kernel& k, Args&&... args); (3) +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (6) } ---- -_Effects_: Equivalent to: -[source,c++] +_Effects_ (1-3): Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.set_args(args...); h.parallel_for(nd_range(r, size), k); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- +_Effects_ (4-6): Equivalent to: [code]#+return launch_grouped(q, r, size, {}, k, +args...);+#. + ''' // Launch an ND-range parallel_for with a function object and reductions. @@ -279,35 +461,43 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { +template +std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) + +template +std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) + +template +std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) + template -void launch_grouped_reduce(const queue&, - range<1> r, range<1> size, const KernelType& k, - Reductions&&... reductions); (1) +std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const KernelType& k, Reductions&&... reductions); (4) template -void launch_grouped_reduce(const queue&, - range<2> r, range<2> size, const KernelType& k, - Reductions&&... reductions); (2) +std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const KernelType& k, Reductions&&... reductions); (5) template -void launch_grouped_reduce(const queue&, - range<3> r, range<3> size, const KernelType& k, - Reductions&&... reductions); (3) +std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const KernelType& k, Reductions&&... reductions); (6) } ---- _Constraints_: The parameter pack consists of 1 or more objects created by the [code]#reduction# function. -_Effects_: Equivalent to: - -[source,c++] +_Effects_ (1-3): Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.parallel_for(nd_range(r, size), reductions..., k); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- +_Effects_ (4-6): Equivalent to [code]#+return launch_grouped_reduce(q, r, size, +{}, k, reductions...);+#. + ''' // Launch a single work-item with a function object. @@ -317,21 +507,28 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { +template +std::optional launch_task(const queue& q, const requirements& reqs, const KernelType& k); (1) + template -void launch_task(const queue&, const KernelType& k); +std::optional launch_task(const queue& q, const KernelType& k); (2) } ---- -_Effects_: Equivalent to: - -[source,c++] +_Effects_ (1): Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.single_task(k); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- +_Effects_ (2): Equivalent to [code]#return launch_task(q, {}, k);#. + ''' // Launch a single work-item with a sycl::kernel object. @@ -341,20 +538,28 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { +template +std::optional launch_task(const queue& q, const requirements& reqs, const kernel& k, Args&&... args); (1) + template -void launch_task(const queue&, const kernel& k, Args&&... args); +std::optional launch_task(const queue& q, const kernel& k, Args&&... args); (2) } ---- -_Effects_: Equivalent to: -[source,c++] +_Effects_ (1): Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.set_args(args...); h.single_task(k); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- +_Effects_ (2): Equivalent to [code]#+return launch_task(q, {}, k, args...);+#. + ''' === Memory operations @@ -364,18 +569,21 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -void memcpy(const queue&, void* dest, const void* src, size_t numBytes); +template +std::optional memcpy(const queue& q, void* dest, const void* src, size_t numBytes, const requirements& reqs = {}); } ---- -_Effects_: Equivalent to: - -[source,c++] +_Effects_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.memcpy(dest, src, numBytes); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -385,8 +593,8 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -template -void copy(const queue& q, const T* src, T* dest, size_t count); +template +std::optional copy(const queue& q, const T* src, T* dest, size_t count, const requirements& reqs = {}); } ---- @@ -406,13 +614,15 @@ _Preconditions_: * If either [code]#src# or [code]#dest# is a pointer to a USM allocation, that allocation was created from the same context associated with [code]#q#. -_Effects_: Equivalent to: - -[source,c++] +_Effects_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.copy(src, dest, count); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -422,13 +632,11 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -template -void copy(const queue&, const SrcT* src, - accessor dest); (1) +template +std::optional copy(const queue& q, const SrcT* src, accessor dest, const requirements& reqs = {}); (1) -template -void copy(const queue&, std::shared_ptr src, - accessor dest); (2) +template +std::optional copy(const queue& q, std::shared_ptr src, accessor dest, const requirements& reqs = {}); (2) } ---- @@ -447,14 +655,16 @@ _Preconditions_: * [code]#src# points to an allocation of at least as many bytes as the range represented by [code]#dest#. -_Effects_: Equivalent to: - -[source,c++] +_Effects_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.require(dest); h.copy(src, dest); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -464,15 +674,11 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -template void - copy(const queue&, - accessor src, - DestT* dest); (1) +template +std::optional copy(const queue& q, accessor src, DestT* dest, const requirements& reqs = {}); (1) -template void - copy(const queue&, - accessor src, - std::shared_ptr dest); (2) +template +std::optional copy(const queue& q, accessor src, std::shared_ptr dest, const requirements& reqs = {}); (2) } ---- @@ -491,14 +697,16 @@ _Preconditions_: * [code]#dest# points to an allocation of at least as many bytes as the range represented by [code]#src#. -_Effects_: Equivalent to: - -[source,c++] +_Effects_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.require(src); h.copy(src, dest); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -508,10 +716,8 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -template -void copy(const queue&, accessor src, - accessor dest); +template +std::optional copy(const queue& q, accessor src, accessor dest, const requirements& reqs = {}); } ---- @@ -526,11 +732,12 @@ _Constraints_: * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. -_Effects_: Equivalent to: - -[source,c++] +_Effects_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.require(src); h.require(dest); h.copy(src, dest); @@ -547,17 +754,20 @@ code if [code]#dest.get_count() < src.get_count()#. ---- namespace sycl::khr { -void memset(const queue&, void* ptr, int value, size_t numBytes); +template +std::optional memset(const queue& q, void* ptr, int value, size_t numBytes, const requirements& reqs = {}); } ---- -_Effects_: Equivalent to: - -[source,c++] +_Effects_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.memset(ptr, value, numBytes); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -567,34 +777,35 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -template -void fill(const queue&, T* ptr, const T& pattern, size_t count); (1) +template +std::optional fill(const queue& q, T* ptr, const T& pattern, size_t count, const requirements& reqs = {}); (1) -template -void fill(const queue&, - accessor dest, const T& src); (2) +template +std::optional fill(const queue& q, accessor dest, const T& src, const requirements& reqs = {}); (2) } ---- _Constraints (1)_: [code]#T# is <>. -_Effects (1)_: Equivalent to: - -[source,c++] +_Effects (1)_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.fill(ptr, pattern, count); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects (2)_: Equivalent to: - -[source,c++] +_Effects (2)_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { h.fill(dest, src); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -604,19 +815,21 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -template -void update_host(const queue&, accessor acc); +template +std::optional update_host(const queue& q, accessor acc, const requirements& reqs = {}); } ---- -_Effects_: Equivalent to: - -[source,c++] +_Effects_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.require(acc); h.update_host(acc); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -626,17 +839,20 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -void prefetch(const queue&, void* ptr, size_t numBytes); +template +std::optional prefetch(const queue& q, void* ptr, size_t numBytes, const requirements& reqs = {}); } ---- -_Effects_: Equivalent to: - -[source,c++] +_Effects_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.prefetch(ptr, numBytes); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -646,17 +862,20 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -void mem_advise(const queue&, void* ptr, size_t numBytes, int advice); +template +std::optional mem_advise(const queue& q, void* ptr, size_t numBytes, int advice, const requirements& reqs = {}); } ---- -_Effects_: Equivalent to: - -[source,c++] +_Effects_: Equivalent to: + +[source,sycl] ---- -q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.mem_advise(ptr, numBytes, advice); }); +return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -668,7 +887,8 @@ q.submit([&](handler& h) { ---- namespace sycl::khr { -void command_barrier(const queue&); +template +std::optional command_barrier(const queue& q, const requirements& reqs = {}); } ---- @@ -676,10 +896,10 @@ _Effects_: Enqueues a command barrier. Any commands submitted after this barrier cannot begin execution until: * All commands previously submitted to this queue have completed; and -* All commands associated with this command's dependencies have completed. +* All requirements in [code]#reqs# are satisfied. -{note}If a [code]#command_barrier# is submitted to an in-order queue and has no -other dependencies, then this operation may be a no-op.{endnote} +{note}If a [code]#command_barrier# is submitted to an in-order queue with no +requirements, then this operation may be a no-op.{endnote} ''' @@ -688,17 +908,16 @@ other dependencies, then this operation may be a no-op.{endnote} ---- namespace sycl::khr { -void event_barrier(const queue&, const std::vector& events); +template +std::optional event_barrier(const queue& q, const requirements& reqs = {}); } ---- _Effects_: Enqueues an event barrier. -Any commands submitted after this barrier cannot begin execution until: - -* All commands associated with [code]#events# have completed; and -* All commands associated with this command's dependencies have completed. +Any commands submitted after this barrier cannot begin execution until all +requirements in [code]#reqs# are satisfied. -{note}For both overloads, if [code]#events# is empty and an event barrier has no -other dependencies, then this operation may be a no-op.{endnote} +{note}If an [code]#event_barrier# is submitted with no requirements, then this +operation may be a no-op.{endnote} ''' From 3057215aa8da8ea9e509fa4256c5a46cb8b48f37 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 13 Jun 2025 14:57:31 +0100 Subject: [PATCH 45/73] Allow kernel_bundle as a requirement Adding a kernel_bundle as a requirement should have the same effect as calling handler::use_kernel_bundle. handler::use_kernel_bundle cannot be used in conjunction with commands accepting a kernel object, and the command is defined as ignoring the kernel_bundle in that case. Since the new API receives the command and all requirements simultaneously, and the requirements are known at compile-time, we can instead use a Constraint to ensure that such code doesn't compile. --- adoc/config/rouge/lib/rouge/lexers/sycl.rb | 1 + .../sycl_khr_free_function_commands.adoc | 60 ++++++++++++++++--- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/adoc/config/rouge/lib/rouge/lexers/sycl.rb b/adoc/config/rouge/lib/rouge/lexers/sycl.rb index 46e3902c4..4883cb29a 100644 --- a/adoc/config/rouge/lib/rouge/lexers/sycl.rb +++ b/adoc/config/rouge/lib/rouge/lexers/sycl.rb @@ -440,6 +440,7 @@ class Sycl < Cpp sycl_exposition_only = %w( register-events register-accessors + register-kernel-bundle has-tracking ) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 55bc1c3bd..ce4b9d660 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -127,9 +127,10 @@ public: // Only available if: all Requirements satisfy is_requirement_v requirements(Requirements... values); - void register-accessors(handler& h) const; // exposition only - void register-events(handler& h) const; // exposition only - bool has-tracking() const; // exposition only + void register-accessors(handler& h) const; // exposition only + void register-events(handler& h) const; // exposition only + void register-kernel-bundle(handler& h) const; // exposition only + bool has-tracking() const; // exposition only }; class tracking { @@ -158,6 +159,9 @@ For each type, the [code]#is_requirement# type trait is specialized such that tracked via an [code]#event# when the [code]#tracking# object is constructed with an [code]#enabled# value of [code]#true#. +* [code]#kernel_bundle#: The command must be submitted + using a <> from the kernel bundle. + ''' .[apititle]#Default constructor# @@ -167,15 +171,18 @@ template requirements(Requirements... values); ---- -_Constraints_: [code]#is_requirement_v# returns [code]#true# for each type in -[code]#Requirements#. +_Constraints_: +* [code]#is_requirement_v# returns [code]#true# for each type in + [code]#Requirements#; and +* [code]#Requirements# contains at most one + [code]#kernel_bundle#. _Effects_: Constructs a [code]#requirements# object representing the set of requirements specified via the [code]#values# parameter pack. -_Remarks_: If an instance of a requirement appears more than once in the -[code]#values# parameter pack, the [code]#requirements# object behaves as if it -had only been specified once. +_Remarks_: Unless otherwise specified, if an instance of a requirement appears +more than once in the [code]#values# parameter pack, the [code]#requirements# +object behaves as if it had only been specified once. ''' @@ -202,6 +209,18 @@ _Effects_: Calls [code]#h.depends_on# for each [code]#event# or ''' +.[apititle]#requirements::register-kernel-bundle# +[source,role=synopsis,id=api:register-kernel-bundle] +---- +void register-kernel-bundle(handler& h) const; // exposition only +---- + +_Effects_: Calls [code]#h.use_kernel_bundle# if a [code]#kernel_bundle# in +executable state was passed to the constructor of this [code]#requirements# +object and has no effect otherwise. + +''' + .[apititle]#requirements::has-tracking# [source,role=synopsis,id=api:has-tracking] ---- @@ -269,6 +288,7 @@ _Effects_ (1-3): Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.parallel_for(r, k); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -305,6 +325,10 @@ std::optional launch(const queue& q, range<3> r, const kernel& k, Args&&. } ---- + +_Constraints_ (1-3): [code]#Requirements# does not contain a +[code]#kernel_bundle#. + _Effects_ (1-3): Equivalent to: + [source,sycl] ---- @@ -357,6 +381,7 @@ _Effects_ (1-3): Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.parallel_for(r, reductions..., k); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -400,6 +425,7 @@ _Effects_ (1-3): Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.parallel_for(nd_range(r, size), k); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -437,6 +463,9 @@ std::optional launch_grouped(const queue& q, range<2> r, range<2> size, c } ---- +_Constraints_ (1-3): [code]#Requirements# does not contain a +[code]#kernel_bundle#. + _Effects_ (1-3): Equivalent to: + [source,sycl] ---- @@ -490,6 +519,7 @@ _Effects_ (1-3): Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.parallel_for(nd_range(r, size), reductions..., k); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -522,6 +552,7 @@ _Effects_ (1): Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.single_task(k); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -546,6 +577,9 @@ std::optional launch_task(const queue& q, const kernel& k, Args&&... args } ---- +_Constraints_ (1): [code]#Requirements# does not contain a +[code]#kernel_bundle#. + _Effects_ (1): Equivalent to: + [source,sycl] ---- @@ -581,6 +615,7 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.memcpy(dest, src, numBytes); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -620,6 +655,7 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.copy(src, dest, count); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -661,6 +697,7 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.require(dest); h.copy(src, dest); }); @@ -703,6 +740,7 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.require(src); h.copy(src, dest); }); @@ -738,6 +776,7 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.require(src); h.require(dest); h.copy(src, dest); @@ -765,6 +804,7 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.memset(ptr, value, numBytes); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -794,6 +834,7 @@ _Effects (1)_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.fill(ptr, pattern, count); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -826,6 +867,7 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.require(acc); h.update_host(acc); }); @@ -850,6 +892,7 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.prefetch(ptr, numBytes); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -873,6 +916,7 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); + reqs.register-kernel-bundle(h); h.mem_advise(ptr, numBytes, advice); }); return (reqs.has-tracking()) ? ev : std::nullopt; From 3f66bb48e182c9226fb3b749fc184014f9c6fb20 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 17 Jun 2025 07:13:39 +0100 Subject: [PATCH 46/73] Use "status" instead of "state" for events This is consistent with info::event_command_status. Co-authored-by: Gordon Brown --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index ce4b9d660..a18a02d46 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -155,7 +155,7 @@ For each type, the [code]#is_requirement# type trait is specialized such that [code]#buffer# associated with the [code]#accessor# can be accessed in a manner compatible with the specified [code]#access_mode#. -* [code]#tracking#: The command must be submitted such that its state can be +* [code]#tracking#: The command must be submitted such that its status can be tracked via an [code]#event# when the [code]#tracking# object is constructed with an [code]#enabled# value of [code]#true#. From bbd090e405c17bddab3bef1b9bb1a0058bb5ba03 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 17 Jun 2025 07:31:48 +0100 Subject: [PATCH 47/73] Forbid requirements with multiple tracking objects Although we could limit errors to the {tracking(true), tracking(false)} case, this would have to be deferred until runtime. Ensuring that each requirements object contains only one tracking requirement is simpler and less error-prone. --- adoc/extensions/sycl_khr_free_function_commands.adoc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index a18a02d46..f67d0fd4b 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -173,9 +173,10 @@ requirements(Requirements... values); _Constraints_: * [code]#is_requirement_v# returns [code]#true# for each type in - [code]#Requirements#; and + [code]#Requirements#; * [code]#Requirements# contains at most one - [code]#kernel_bundle#. + [code]#kernel_bundle#; and +* [code]#Requirements# contains at most one [code]#tracking#. _Effects_: Constructs a [code]#requirements# object representing the set of requirements specified via the [code]#values# parameter pack. From 5edf35d2a5f37f674e8852295c5f2ac8f7b1d01a Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 17 Jun 2025 20:14:11 +0100 Subject: [PATCH 48/73] Add blank line before bulleted list --- adoc/extensions/sycl_khr_free_function_commands.adoc | 1 + 1 file changed, 1 insertion(+) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index f67d0fd4b..48445a358 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -172,6 +172,7 @@ requirements(Requirements... values); ---- _Constraints_: + * [code]#is_requirement_v# returns [code]#true# for each type in [code]#Requirements#; * [code]#Requirements# contains at most one From 44ed92c92d523d30f122ebe48f2ccfe49fcff052 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 17 Jun 2025 20:18:19 +0100 Subject: [PATCH 49/73] Expand Requirements... parameter pack correctly --- .../sycl_khr_free_function_commands.adoc | 110 +++++++++--------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 48445a358..911d4b648 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -264,22 +264,22 @@ expected to introduce a small amount of overhead compared to providing no namespace sycl::khr { template -std::optional launch(const queue& q, range<1> r, const requirements& reqs, const KernelType& k); (1) +std::optional launch(const queue& q, range<1> r, const requirements& reqs, const KernelType& k); (1) template -std::optional launch(const queue& q, range<2> r, const requirements& reqs, const KernelType& k); (2) +std::optional launch(const queue& q, range<2> r, const requirements& reqs, const KernelType& k); (2) template -std::optional launch(const queue& q, range<3> r, const requirements& reqs, const KernelType& k); (3) +std::optional launch(const queue& q, range<3> r, const requirements& reqs, const KernelType& k); (3) template -std::optional launch(const queue& q, range<1> r, const KernelType& k); (4) +std::optional launch(const queue& q, range<1> r, const KernelType& k); (4) template -std::optional launch(const queue& q, range<2> r, const KernelType& k); (5) +std::optional launch(const queue& q, range<2> r, const KernelType& k); (5) template -std::optional launch(const queue& q, range<3> r, const KernelType& k); (6) +std::optional launch(const queue& q, range<3> r, const KernelType& k); (6) } ---- @@ -308,22 +308,22 @@ _Effects_ (4-6): Equivalent to: [code]#return launch(q, r, {}, k);#. namespace sycl::khr { template -std::optional launch(const queue& q, range<1> r, const requirements& reqs, const kernel& k, Args&&... args); (1) +std::optional launch(const queue& q, range<1> r, const requirements& reqs, const kernel& k, Args&&... args); (1) template -std::optional launch(const queue& q, range<2> r, const requirements& reqs, const kernel& k, Args&&... args); (2) +std::optional launch(const queue& q, range<2> r, const requirements& reqs, const kernel& k, Args&&... args); (2) template -std::optional launch(const queue& q, range<3> r, const requirements& reqs, const kernel& k, Args&&... args); (3) +std::optional launch(const queue& q, range<3> r, const requirements& reqs, const kernel& k, Args&&... args); (3) template -std::optional launch(const queue& q, range<1> r, const kernel& k, Args&&... args); (4) +std::optional launch(const queue& q, range<1> r, const kernel& k, Args&&... args); (4) template -std::optional launch(const queue& q, range<2> r, const kernel& k, Args&&... args); (5) +std::optional launch(const queue& q, range<2> r, const kernel& k, Args&&... args); (5) template -std::optional launch(const queue& q, range<3> r, const kernel& k, Args&&... args); (6) +std::optional launch(const queue& q, range<3> r, const kernel& k, Args&&... args); (6) } ---- @@ -355,22 +355,22 @@ _Effects_ (4-6): Equivalent to: [code]#+return launch(q, r, {}, k, args...);+#. namespace sycl::khr { template -std::optional launch_reduce(const queue& q, range<1> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) +std::optional launch_reduce(const queue& q, range<1> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) template -std::optional launch_reduce(const queue& q, range<2> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) +std::optional launch_reduce(const queue& q, range<2> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) template -std::optional launch_reduce(const queue& q, range<3> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) +std::optional launch_reduce(const queue& q, range<3> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) template -std::optional launch_reduce(const queue& q, range<1> r, const KernelType& k, Reductions&&... reductions); (4) +std::optional launch_reduce(const queue& q, range<1> r, const KernelType& k, Reductions&&... reductions); (4) template -std::optional launch_reduce(const queue& q, range<2> r, const KernelType& k, Reductions&&... reductions); (5) +std::optional launch_reduce(const queue& q, range<2> r, const KernelType& k, Reductions&&... reductions); (5) template -std::optional launch_reduce(const queue& q, range<3> r, const KernelType& k, Reductions&&... reductions); (6) +std::optional launch_reduce(const queue& q, range<3> r, const KernelType& k, Reductions&&... reductions); (6) } ---- @@ -402,22 +402,22 @@ reductions...);+#. namespace sycl::khr { template -std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k); (1) +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k); (1) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k); (2) +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k); (2) template -std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k); (3) +std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k); (3) template -std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const KernelType& k); (4) +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const KernelType& k); (4) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const KernelType& k); (5) +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const KernelType& k); (5) template -std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const KernelType& k); (6) +std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const KernelType& k); (6) } ---- @@ -446,22 +446,22 @@ k);#. namespace sycl::khr { template -std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const kernel& k, Args&&... args); (1) +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const kernel& k, Args&&... args); (1) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const kernel& k, Args&&... args); (2) +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const kernel& k, Args&&... args); (2) template -std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const kernel& k, Args&&... args); (3) +std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const kernel& k, Args&&... args); (3) template -std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const kernel& k, Args&&... args); (4) +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const kernel& k, Args&&... args); (4) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (5) +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (5) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (6) +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (6) } ---- @@ -493,22 +493,22 @@ args...);+#. namespace sycl::khr { template -std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) +std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) template -std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) +std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) template -std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) +std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) template -std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const KernelType& k, Reductions&&... reductions); (4) +std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const KernelType& k, Reductions&&... reductions); (4) template -std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const KernelType& k, Reductions&&... reductions); (5) +std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const KernelType& k, Reductions&&... reductions); (5) template -std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const KernelType& k, Reductions&&... reductions); (6) +std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const KernelType& k, Reductions&&... reductions); (6) } ---- @@ -540,10 +540,10 @@ _Effects_ (4-6): Equivalent to [code]#+return launch_grouped_reduce(q, r, size, namespace sycl::khr { template -std::optional launch_task(const queue& q, const requirements& reqs, const KernelType& k); (1) +std::optional launch_task(const queue& q, const requirements& reqs, const KernelType& k); (1) template -std::optional launch_task(const queue& q, const KernelType& k); (2) +std::optional launch_task(const queue& q, const KernelType& k); (2) } ---- @@ -572,10 +572,10 @@ _Effects_ (2): Equivalent to [code]#return launch_task(q, {}, k);#. namespace sycl::khr { template -std::optional launch_task(const queue& q, const requirements& reqs, const kernel& k, Args&&... args); (1) +std::optional launch_task(const queue& q, const requirements& reqs, const kernel& k, Args&&... args); (1) template -std::optional launch_task(const queue& q, const kernel& k, Args&&... args); (2) +std::optional launch_task(const queue& q, const kernel& k, Args&&... args); (2) } ---- @@ -606,7 +606,7 @@ _Effects_ (2): Equivalent to [code]#+return launch_task(q, {}, k, args...);+#. namespace sycl::khr { template -std::optional memcpy(const queue& q, void* dest, const void* src, size_t numBytes, const requirements& reqs = {}); +std::optional memcpy(const queue& q, void* dest, const void* src, size_t numBytes, const requirements& reqs = {}); } ---- @@ -631,7 +631,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional copy(const queue& q, const T* src, T* dest, size_t count, const requirements& reqs = {}); +std::optional copy(const queue& q, const T* src, T* dest, size_t count, const requirements& reqs = {}); } ---- @@ -671,10 +671,10 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional copy(const queue& q, const SrcT* src, accessor dest, const requirements& reqs = {}); (1) +std::optional copy(const queue& q, const SrcT* src, accessor dest, const requirements& reqs = {}); (1) template -std::optional copy(const queue& q, std::shared_ptr src, accessor dest, const requirements& reqs = {}); (2) +std::optional copy(const queue& q, std::shared_ptr src, accessor dest, const requirements& reqs = {}); (2) } ---- @@ -714,10 +714,10 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional copy(const queue& q, accessor src, DestT* dest, const requirements& reqs = {}); (1) +std::optional copy(const queue& q, accessor src, DestT* dest, const requirements& reqs = {}); (1) template -std::optional copy(const queue& q, accessor src, std::shared_ptr dest, const requirements& reqs = {}); (2) +std::optional copy(const queue& q, accessor src, std::shared_ptr dest, const requirements& reqs = {}); (2) } ---- @@ -757,7 +757,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional copy(const queue& q, accessor src, accessor dest, const requirements& reqs = {}); +std::optional copy(const queue& q, accessor src, accessor dest, const requirements& reqs = {}); } ---- @@ -796,7 +796,7 @@ code if [code]#dest.get_count() < src.get_count()#. namespace sycl::khr { template -std::optional memset(const queue& q, void* ptr, int value, size_t numBytes, const requirements& reqs = {}); +std::optional memset(const queue& q, void* ptr, int value, size_t numBytes, const requirements& reqs = {}); } ---- @@ -820,10 +820,10 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional fill(const queue& q, T* ptr, const T& pattern, size_t count, const requirements& reqs = {}); (1) +std::optional fill(const queue& q, T* ptr, const T& pattern, size_t count, const requirements& reqs = {}); (1) template -std::optional fill(const queue& q, accessor dest, const T& src, const requirements& reqs = {}); (2) +std::optional fill(const queue& q, accessor dest, const T& src, const requirements& reqs = {}); (2) } ---- @@ -859,7 +859,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional update_host(const queue& q, accessor acc, const requirements& reqs = {}); +std::optional update_host(const queue& q, accessor acc, const requirements& reqs = {}); } ---- @@ -884,7 +884,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional prefetch(const queue& q, void* ptr, size_t numBytes, const requirements& reqs = {}); +std::optional prefetch(const queue& q, void* ptr, size_t numBytes, const requirements& reqs = {}); } ---- @@ -908,7 +908,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional mem_advise(const queue& q, void* ptr, size_t numBytes, int advice, const requirements& reqs = {}); +std::optional mem_advise(const queue& q, void* ptr, size_t numBytes, int advice, const requirements& reqs = {}); } ---- @@ -934,7 +934,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional command_barrier(const queue& q, const requirements& reqs = {}); +std::optional command_barrier(const queue& q, const requirements& reqs = {}); } ---- @@ -955,7 +955,7 @@ requirements, then this operation may be a no-op.{endnote} namespace sycl::khr { template -std::optional event_barrier(const queue& q, const requirements& reqs = {}); +std::optional event_barrier(const queue& q, const requirements& reqs = {}); } ---- From 307362110b9275a440f0038af0b49e6917fb4cfb Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 17 Jun 2025 20:37:09 +0100 Subject: [PATCH 50/73] Support only non-deprecated accessors --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 911d4b648..4d5c339f9 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -154,6 +154,8 @@ For each type, the [code]#is_requirement# type trait is specialized such that * [code]#accessor#: The command must not begin executing until the [code]#buffer# associated with the [code]#accessor# can be accessed in a manner compatible with the specified [code]#access_mode#. + The [code]#accessor# must have an [code]#AccessTarget# of + [code]#target::device# or [code]#target::host_task#. * [code]#tracking#: The command must be submitted such that its status can be tracked via an [code]#event# when the [code]#tracking# object is constructed From 2afc9f0be6be8b6fa60f476c1149d9a778bb11ec Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 17 Jun 2025 20:44:14 +0100 Subject: [PATCH 51/73] Add exposition-only paragraph to function synopses --- .../sycl_khr_free_function_commands.adoc | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 4d5c339f9..7c99b817a 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -193,9 +193,13 @@ object behaves as if it had only been specified once. .[apititle]#requirements::register-accessors# [source,role=synopsis,id=api:register-accessors] ---- -void register-accessors(handler& h) const; // exposition only +void register-accessors(handler& h) const; ---- +This function is exposition only. +It is shown only to help specify the effect of the functions below under "New +free functions". + _Effects_: Calls [code]#h.require# for each [code]#accessor# passed to the constructor of this [code]#requirements# object. @@ -204,9 +208,13 @@ constructor of this [code]#requirements# object. .[apititle]#requirements::register-events# [source,role=synopsis,id=api:register-events] ---- -void register-events(handler& h) const; // exposition only +void register-events(handler& h) const; ---- +This function is exposition only. +It is shown only to help specify the effect of the functions below under "New +free functions". + _Effects_: Calls [code]#h.depends_on# for each [code]#event# or [code]#std::vector# passed to the constructor of this [code]#requirements# object. @@ -216,9 +224,13 @@ _Effects_: Calls [code]#h.depends_on# for each [code]#event# or .[apititle]#requirements::register-kernel-bundle# [source,role=synopsis,id=api:register-kernel-bundle] ---- -void register-kernel-bundle(handler& h) const; // exposition only +void register-kernel-bundle(handler& h) const; ---- +This function is exposition only. +It is shown only to help specify the effect of the functions below under "New +free functions". + _Effects_: Calls [code]#h.use_kernel_bundle# if a [code]#kernel_bundle# in executable state was passed to the constructor of this [code]#requirements# object and has no effect otherwise. @@ -228,9 +240,13 @@ object and has no effect otherwise. .[apititle]#requirements::has-tracking# [source,role=synopsis,id=api:has-tracking] ---- -bool has-tracking() const; // exposition only +bool has-tracking() const; ---- +This function is exposition only. +It is shown only to help specify the effect of the functions below under "New +free functions". + _Returns_: [code]#true# if this [code]#requirements# object was constructed with a [code]#tracking# object with tracking enabled, and [code]#false# otherwise. From f31095a7144012e73f9930d303d3eb49def2b296 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 19 Jun 2025 09:45:18 +0100 Subject: [PATCH 52/73] Add constraints to limit kernel bundles to kernels Commands like copy, memcpy, fill, etc are not kernels and so passing a kernel_bundle as a requirement is not meaningful. --- .../sycl_khr_free_function_commands.adoc | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 7c99b817a..2ab7cab57 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -629,13 +629,14 @@ std::optional memcpy(const queue& q, void* dest, const void* src, size_t } ---- +_Constraints_: [code]#Requirements# does not contain a [code]#kernel_bundle#. + _Effects_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); - reqs.register-kernel-bundle(h); h.memcpy(dest, src, numBytes); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -656,7 +657,10 @@ std::optional copy(const queue& q, const T* src, T* dest, size_t count, c Copies between two USM pointers. -_Constraints_: [code]#T# is <>. +_Constraints_: + +* [code]#T# is <>; and +* [code]#Requirements# does not contain a [code]#kernel_bundle#. _Preconditions_: @@ -675,7 +679,6 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); - reqs.register-kernel-bundle(h); h.copy(src, dest, count); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -701,9 +704,10 @@ Copies from host to device. _Constraints_: -* [code]#SrcT# and [code]#DestT# are <>; and +* [code]#SrcT# and [code]#DestT# are <>; * [code]#DestMode# is [code]#access_mode::write# or - [code]#access_mode::read_write#. + [code]#access_mode::read_write#; and +* [code]#Requirements# does not contain a [code]#kernel_bundle#. _Preconditions_: @@ -717,7 +721,6 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); - reqs.register-kernel-bundle(h); h.require(dest); h.copy(src, dest); }); @@ -744,9 +747,10 @@ Copies from device to host. _Constraints_: -* [code]#SrcT# and [code]#DestT# are <>; and +* [code]#SrcT# and [code]#DestT# are <>; * [code]#DestMode# is [code]#access_mode::read# or - [code]#access_mode::read_write#. + [code]#access_mode::read_write#; and +* [code]#Requirements# does not contain a [code]#kernel_bundle#. _Preconditions_: @@ -760,7 +764,6 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); - reqs.register-kernel-bundle(h); h.require(src); h.copy(src, dest); }); @@ -786,9 +789,10 @@ _Constraints_: * [code]#SrcT# and [code]#DestT# are <>; * [code]#SrcMode# is [code]#access_mode::read# or - [code]#access_mode::read_write#; and + [code]#access_mode::read_write#; * [code]#DestMode# is [code]#access_mode::write# or - [code]#access_mode::read_write#. + [code]#access_mode::read_write#; and +* [code]#Requirements# does not contain a [code]#kernel_bundle#. _Effects_: Equivalent to: + [source,sycl] @@ -796,7 +800,6 @@ _Effects_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); - reqs.register-kernel-bundle(h); h.require(src); h.require(dest); h.copy(src, dest); @@ -818,13 +821,15 @@ std::optional memset(const queue& q, void* ptr, int value, size_t numByte } ---- + +_Constraints_: [code]#Requirements# does not contain a [code]#kernel_bundle#. + _Effects_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); - reqs.register-kernel-bundle(h); h.memset(ptr, value, numBytes); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -846,7 +851,14 @@ std::optional fill(const queue& q, accessor>. +_Constraints (1)_: + +* [code]#T# is <>; and +* [code]#Requirements# does not contain a [code]#kernel_bundle#. + +_Constraints (2)_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#. _Effects (1)_: Equivalent to: + [source,sycl] @@ -854,7 +866,6 @@ _Effects (1)_: Equivalent to: + event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); - reqs.register-kernel-bundle(h); h.fill(ptr, pattern, count); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -864,6 +875,8 @@ _Effects (2)_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); h.fill(dest, src); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -881,13 +894,15 @@ std::optional update_host(const queue& q, accessor prefetch(const queue& q, void* ptr, size_t numBytes, const } ---- +_Constraints_: [code]#Requirements# does not contain a [code]#kernel_bundle#. + _Effects_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); - reqs.register-kernel-bundle(h); h.prefetch(ptr, numBytes); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -930,13 +946,14 @@ std::optional mem_advise(const queue& q, void* ptr, size_t numBytes, int } ---- +_Constraints_: [code]#Requirements# does not contain a [code]#kernel_bundle#. + _Effects_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); - reqs.register-kernel-bundle(h); h.mem_advise(ptr, numBytes, advice); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -956,6 +973,8 @@ std::optional command_barrier(const queue& q, const requirements event_barrier(const queue& q, const requirements Date: Thu, 19 Jun 2025 09:54:03 +0100 Subject: [PATCH 53/73] Add constraints to limit accessors to kernels Commands like copy, memcpy, fill, etc take their arguments explicitly rather than being captured by a function, and so there is no need to inform the runtime about which accessors are used. If a command uses an accessor, it must have been passed as an argument. --- .../sycl_khr_free_function_commands.adoc | 93 +++++++++++-------- 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 2ab7cab57..8416dae09 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -346,15 +346,16 @@ std::optional launch(const queue& q, range<3> r, const kernel& k, Args&&. } ---- -_Constraints_ (1-3): [code]#Requirements# does not contain a -[code]#kernel_bundle#. +_Constraints_ (1-3): + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Effects_ (1-3): Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.set_args(args...); h.parallel_for(r, k); }); @@ -483,15 +484,16 @@ std::optional launch_grouped(const queue& q, range<2> r, range<2> size, c } ---- -_Constraints_ (1-3): [code]#Requirements# does not contain a -[code]#kernel_bundle#. +_Constraints_ (1-3): + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Effects_ (1-3): Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.set_args(args...); h.parallel_for(nd_range(r, size), k); }); @@ -597,15 +599,16 @@ std::optional launch_task(const queue& q, const kernel& k, Args&&... args } ---- -_Constraints_ (1): [code]#Requirements# does not contain a -[code]#kernel_bundle#. +_Constraints_ (1): + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Effects_ (1): Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.set_args(args...); h.single_task(k); }); @@ -629,14 +632,16 @@ std::optional memcpy(const queue& q, void* dest, const void* src, size_t } ---- -_Constraints_: [code]#Requirements# does not contain a [code]#kernel_bundle#. +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Effects_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.memcpy(dest, src, numBytes); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -659,8 +664,9 @@ Copies between two USM pointers. _Constraints_: -* [code]#T# is <>; and -* [code]#Requirements# does not contain a [code]#kernel_bundle#. +* [code]#T# is <>; +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Preconditions_: @@ -678,7 +684,6 @@ _Effects_: Equivalent to: + ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.copy(src, dest, count); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -706,8 +711,9 @@ _Constraints_: * [code]#SrcT# and [code]#DestT# are <>; * [code]#DestMode# is [code]#access_mode::write# or - [code]#access_mode::read_write#; and -* [code]#Requirements# does not contain a [code]#kernel_bundle#. + [code]#access_mode::read_write#; +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Preconditions_: @@ -720,7 +726,6 @@ _Effects_: Equivalent to: + ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.require(dest); h.copy(src, dest); }); @@ -749,8 +754,9 @@ _Constraints_: * [code]#SrcT# and [code]#DestT# are <>; * [code]#DestMode# is [code]#access_mode::read# or - [code]#access_mode::read_write#; and -* [code]#Requirements# does not contain a [code]#kernel_bundle#. + [code]#access_mode::read_write#; +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Preconditions_: @@ -763,7 +769,6 @@ _Effects_: Equivalent to: + ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.require(src); h.copy(src, dest); }); @@ -791,15 +796,15 @@ _Constraints_: * [code]#SrcMode# is [code]#access_mode::read# or [code]#access_mode::read_write#; * [code]#DestMode# is [code]#access_mode::write# or - [code]#access_mode::read_write#; and -* [code]#Requirements# does not contain a [code]#kernel_bundle#. + [code]#access_mode::read_write#; +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Effects_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.require(src); h.require(dest); h.copy(src, dest); @@ -822,14 +827,16 @@ std::optional memset(const queue& q, void* ptr, int value, size_t numByte } ---- -_Constraints_: [code]#Requirements# does not contain a [code]#kernel_bundle#. +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Effects_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.memset(ptr, value, numBytes); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -854,18 +861,19 @@ std::optional fill(const queue& q, accessor>; and -* [code]#Requirements# does not contain a [code]#kernel_bundle#. +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Constraints (2)_: -* [code]#Requirements# does not contain a [code]#kernel_bundle#. +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Effects (1)_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.fill(ptr, pattern, count); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -876,7 +884,6 @@ _Effects (2)_: Equivalent to: + ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.fill(dest, src); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -895,14 +902,16 @@ std::optional update_host(const queue& q, accessor prefetch(const queue& q, void* ptr, size_t numBytes, const } ---- -_Constraints_: [code]#Requirements# does not contain a [code]#kernel_bundle#. +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Effects_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.prefetch(ptr, numBytes); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -946,14 +957,16 @@ std::optional mem_advise(const queue& q, void* ptr, size_t numBytes, int } ---- -_Constraints_: [code]#Requirements# does not contain a [code]#kernel_bundle#. +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. _Effects_: Equivalent to: + [source,sycl] ---- event ev = q.submit([&](handler& h) { reqs.register-events(h); - reqs.register-accessors(h); h.mem_advise(ptr, numBytes, advice); }); return (reqs.has-tracking()) ? ev : std::nullopt; @@ -973,7 +986,10 @@ std::optional command_barrier(const queue& q, const requirements event_barrier(const queue& q, const requirements Date: Thu, 19 Jun 2025 10:16:22 +0100 Subject: [PATCH 54/73] Add constraints to limit accessor targets Any accessor passed to a command that will run on the device must have target::device. --- .../sycl_khr_free_function_commands.adoc | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 8416dae09..cef27fcec 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -302,6 +302,9 @@ std::optional launch(const queue& q, range<3> r, const KernelType& k); } ---- +_Constraints_ (1-3): Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + _Effects_ (1-3): Equivalent to: + [source,sycl] ---- @@ -393,8 +396,15 @@ std::optional launch_reduce(const queue& q, range<3> r, const KernelType& } ---- -_Constraints_: The parameter pack consists of 1 or more objects created by the -[code]#reduction# function. +_Constraints_ (1-3): + +* The parameter pack consists of 1 or more objects created by the +[code]#reduction# function; and +* Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + +_Constraints_ (4-6): The parameter pack consists of 1 or more objects created by +the [code]#reduction# function. _Effects_ (1-3): Equivalent to: + [source,sycl] @@ -440,6 +450,10 @@ std::optional launch_grouped(const queue& q, range<3> r, range<3> size, c } ---- + +_Constraints_ (1-3): Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + _Effects_ (1-3): Equivalent to: + [source,sycl] ---- @@ -532,8 +546,15 @@ std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> } ---- -_Constraints_: The parameter pack consists of 1 or more objects created by the -[code]#reduction# function. +_Constraints_ (1-3): + +* The parameter pack consists of 1 or more objects created by the +[code]#reduction# function; and +* Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + +_Constraints_ (4-6): The parameter pack consists of 1 or more objects created by +the [code]#reduction# function. _Effects_ (1-3): Equivalent to: + [source,sycl] @@ -568,6 +589,9 @@ std::optional launch_task(const queue& q, const KernelType& k); } ---- +_Constraints_ (1): Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + _Effects_ (1): Equivalent to: + [source,sycl] ---- From 547c100762a06af6b7f687aded8454210ddbd0f5 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 18 Jul 2025 09:31:38 +0100 Subject: [PATCH 55/73] Use requirements in free function commands example --- adoc/extensions/sycl_khr_free_function_commands.adoc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index cef27fcec..2498d89b3 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -75,9 +75,11 @@ int main() { khr::command_barrier(myQueue); // Launch an asynchronous kernel to compute matrix addition c = a + b. + // Require that the scheduler create an event to track completion. range<2> local = { 2, 2 }; range<2> global = { N, M }; - khr::launch_grouped(myQueue, global, local, [=](nd_item<2> it) { + auto reqs = khr::requirements(khr::tracking()); + auto ev = khr::launch_grouped(myQueue, global, local, reqs, [=](nd_item<2> it) { size_t i = it.get_global_id(0); size_t j = it.get_global_id(1); size_t index = i * M + j; @@ -86,7 +88,7 @@ int main() { // Wait for all three kernels to complete before accessing the results. // This blocks the host until all previous kernels have completed. - myQueue.wait(); + ev->wait(); std::cout << std::endl << "Result:" << std::endl; for (size_t i = 0; i < N; i++) { From 11ed357ac06409b5b2e35dc4680ff33becd45de5 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 15 Sep 2025 13:14:29 +0000 Subject: [PATCH 56/73] Add launch_host functions These functions are equivalent to the host task submission functions. --- .../sycl_khr_free_function_commands.adoc | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 2498d89b3..6b138abfc 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -561,7 +561,7 @@ the [code]#reduction# function. _Effects_ (1-3): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { +event ev = q.submit([&](handler& h) { reqs.register-events(h); reqs.register-accessors(h); reqs.register-kernel-bundle(h); @@ -645,6 +645,39 @@ _Effects_ (2): Equivalent to [code]#+return launch_task(q, {}, k, args...);+#. ''' +=== Host tasks + +.[apititle]#launch_host# +[source,role=synopsis,id=api:launch_host] +---- +namespace sycl::khr { + +template +std::optional launch_host(const queue& q, const requirements& reqs, T&& hostTaskCallable); (1) + +template +std::optional launch_host(const queue& q, T&& hostTaskCallable); (2) + +} +---- + +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#. + +_Effects_ (1): Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + h.host_task(hostTaskCallable); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects_ (2): Equivalent to [code]#+return launch_host(q, {}, hostTaskCallable);+#. + +''' + === Memory operations .[apititle]#memcpy# From 17c66d952aeaeaec9aba800ed6bda6e12efc3ccd Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 13 Oct 2025 13:10:24 +0000 Subject: [PATCH 57/73] Fix formatting --- adoc/extensions/sycl_khr_free_function_commands.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 6b138abfc..1718031ab 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -674,7 +674,8 @@ event ev = q.submit([&](handler& h) { return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (2): Equivalent to [code]#+return launch_host(q, {}, hostTaskCallable);+#. +_Effects_ (2): Equivalent to [code]#+return launch_host(q, {}, +hostTaskCallable);+#. ''' From 0b30eb1d9a26edf8c2b3f054baccd365e8d5a599 Mon Sep 17 00:00:00 2001 From: Slawomir Ptak Date: Mon, 13 Oct 2025 14:54:32 +0200 Subject: [PATCH 58/73] Apply suggestion from @gmlueck Co-authored-by: Greg Lueck --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 1718031ab..f6f775e26 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -1055,7 +1055,7 @@ _Effects_: Enqueues a command barrier. Any commands submitted after this barrier cannot begin execution until: * All commands previously submitted to this queue have completed; and -* All requirements in [code]#reqs# are satisfied. +* All events in [code]#reqs# have completed. {note}If a [code]#command_barrier# is submitted to an in-order queue with no requirements, then this operation may be a no-op.{endnote} From 166bd462dffffd4388a98aecd7c69b75d0373fbe Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Thu, 16 Oct 2025 13:09:25 +0000 Subject: [PATCH 59/73] Limit the accessor target for host tasks to target::host_task --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index f6f775e26..009f038df 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -663,7 +663,9 @@ std::optional launch_host(const queue& q, T&& hostTaskCallable); (2) _Constraints_: -* [code]#Requirements# does not contain a [code]#kernel_bundle#. +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* Any accessor in [code]#Requirements# must have a target of +[code]#target::host_task#. _Effects_ (1): Equivalent to: + [source,sycl] From 8f01389d8202e0a112ff38a37ab56c4642e1f29a Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Thu, 15 Jan 2026 12:53:59 +0000 Subject: [PATCH 60/73] Update the function names for memory operations. The function names for memory operations now follow the "enqueue_*" pattern, to indicate that these operations are added to the queue and not executed immediately. --- .../sycl_khr_free_function_commands.adoc | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 009f038df..1d196c424 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -683,13 +683,13 @@ hostTaskCallable);+#. === Memory operations -.[apititle]#memcpy# -[source,role=synopsis,id=api:memcpy] +.[apititle]#enqueue_memcpy# +[source,role=synopsis,id=api:enqueue_memcpy] ---- namespace sycl::khr { template -std::optional memcpy(const queue& q, void* dest, const void* src, size_t numBytes, const requirements& reqs = {}); +std::optional enqueue_memcpy(const queue& q, void* dest, const void* src, size_t numBytes, const requirements& reqs = {}); } ---- @@ -711,13 +711,13 @@ return (reqs.has-tracking()) ? ev : std::nullopt; ''' -.[apititle]#copy# (USM pointers) -[source,role=synopsis,id=api:copy-pointer] +.[apititle]#enqueue_copy# (USM pointers) +[source,role=synopsis,id=api:enqueue_copy-pointer] ---- namespace sycl::khr { template -std::optional copy(const queue& q, const T* src, T* dest, size_t count, const requirements& reqs = {}); +std::optional enqueue_copy(const queue& q, const T* src, T* dest, size_t count, const requirements& reqs = {}); } ---- @@ -753,16 +753,16 @@ return (reqs.has-tracking()) ? ev : std::nullopt; ''' -.[apititle]#copy# (accessors, host to device) -[source,role=synopsis,id=api:copy-accessor-h2d] +.[apititle]#enqueue_copy# (accessors, host to device) +[source,role=synopsis,id=api:enqueue_copy-accessor-h2d] ---- namespace sycl::khr { template -std::optional copy(const queue& q, const SrcT* src, accessor dest, const requirements& reqs = {}); (1) +std::optional enqueue_copy(const queue& q, const SrcT* src, accessor dest, const requirements& reqs = {}); (1) template -std::optional copy(const queue& q, std::shared_ptr src, accessor dest, const requirements& reqs = {}); (2) +std::optional enqueue_copy(const queue& q, std::shared_ptr src, accessor dest, const requirements& reqs = {}); (2) } ---- @@ -796,16 +796,16 @@ return (reqs.has-tracking()) ? ev : std::nullopt; ''' -.[apititle]#copy# (accessors, device to host) -[source,role=synopsis,id=api:copy-accessor-d2h] +.[apititle]#enqueue_copy# (accessors, device to host) +[source,role=synopsis,id=api:enqueue_copy-accessor-d2h] ---- namespace sycl::khr { template -std::optional copy(const queue& q, accessor src, DestT* dest, const requirements& reqs = {}); (1) +std::optional enqueue_copy(const queue& q, accessor src, DestT* dest, const requirements& reqs = {}); (1) template -std::optional copy(const queue& q, accessor src, std::shared_ptr dest, const requirements& reqs = {}); (2) +std::optional enqueue_copy(const queue& q, accessor src, std::shared_ptr dest, const requirements& reqs = {}); (2) } ---- @@ -839,13 +839,13 @@ return (reqs.has-tracking()) ? ev : std::nullopt; ''' -.[apititle]#copy# (accessors, device to device) -[source,role=synopsis,id=api:copy-accessor-d2d] +.[apititle]#enqueue_copy# (accessors, device to device) +[source,role=synopsis,id=api:enqueue_copy-accessor-d2d] ---- namespace sycl::khr { template -std::optional copy(const queue& q, accessor src, accessor dest, const requirements& reqs = {}); +std::optional enqueue_copy(const queue& q, accessor src, accessor dest, const requirements& reqs = {}); } ---- @@ -878,13 +878,13 @@ code if [code]#dest.get_count() < src.get_count()#. ''' -.[apititle]#memset# -[source,role=synopsis,id=api:memset] +.[apititle]#enqueue_memset# +[source,role=synopsis,id=api:enqueue_memset] ---- namespace sycl::khr { template -std::optional memset(const queue& q, void* ptr, int value, size_t numBytes, const requirements& reqs = {}); +std::optional enqueue_memset(const queue& q, void* ptr, int value, size_t numBytes, const requirements& reqs = {}); } ---- @@ -906,16 +906,16 @@ return (reqs.has-tracking()) ? ev : std::nullopt; ''' -.[apititle]#fill# -[source,role=synopsis,id=api:fill] +.[apititle]#enqueue_fill# +[source,role=synopsis,id=api:enqueue_fill] ---- namespace sycl::khr { template -std::optional fill(const queue& q, T* ptr, const T& pattern, size_t count, const requirements& reqs = {}); (1) +std::optional enqueue_fill(const queue& q, T* ptr, const T& pattern, size_t count, const requirements& reqs = {}); (1) template -std::optional fill(const queue& q, accessor dest, const T& src, const requirements& reqs = {}); (2) +std::optional enqueue_fill(const queue& q, accessor dest, const T& src, const requirements& reqs = {}); (2) } ---- @@ -953,13 +953,13 @@ return (reqs.has-tracking()) ? ev : std::nullopt; ''' -.[apititle]#update_host# -[source,role=synopsis,id=api:update_host] +.[apititle]#enqueue_update_host# +[source,role=synopsis,id=api:enqueue_update_host] ---- namespace sycl::khr { template -std::optional update_host(const queue& q, accessor acc, const requirements& reqs = {}); +std::optional enqueue_update_host(const queue& q, accessor acc, const requirements& reqs = {}); } ---- @@ -982,13 +982,13 @@ return (reqs.has-tracking()) ? ev : std::nullopt; ''' -.[apititle]#prefetch# -[source,role=synopsis,id=api:prefetch] +.[apititle]#enqueue_prefetch# +[source,role=synopsis,id=api:enqueue_prefetch] ---- namespace sycl::khr { template -std::optional prefetch(const queue& q, void* ptr, size_t numBytes, const requirements& reqs = {}); +std::optional enqueue_prefetch(const queue& q, void* ptr, size_t numBytes, const requirements& reqs = {}); } ---- @@ -1009,13 +1009,13 @@ return (reqs.has-tracking()) ? ev : std::nullopt; ''' -.[apititle]#mem_advise# -[source,role=synopsis,id=api:mem_advise] +.[apititle]#enqueue_mem_advise# +[source,role=synopsis,id=api:enqueue_mem_advise] ---- namespace sycl::khr { template -std::optional mem_advise(const queue& q, void* ptr, size_t numBytes, int advice, const requirements& reqs = {}); +std::optional enqueue_mem_advise(const queue& q, void* ptr, size_t numBytes, int advice, const requirements& reqs = {}); } ---- From 310247bb202d59302128de8a5b9e018d8a5148b2 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 20 Jan 2026 18:56:12 +0000 Subject: [PATCH 61/73] List of changes: - Changed the return type of the functions to void (signal_event should be used to track completion). - Added the signal_event, wait_event and wait_events structs to be used with the requirements object. - Added the following functions: make_event, enqueue_signal_event, enqueue_wait_event, enqueue_wait_events, enqueue_barrier. - Removed the following functions: command_barrier, event_barrier. - Updated the code example. --- .../sycl_khr_free_function_commands.adoc | 445 ++++++++++-------- 1 file changed, 254 insertions(+), 191 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 1d196c424..1d53b5e9e 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -2,7 +2,9 @@ This extension provides an alternative mechanism for submitting commands to a device via free-functions that require developers to opt-in to the handling of -requirements and the creation of [code]#event# objects. +requirements and the creation of [code]#event# objects. It also adds an ability +to reuse the same [code]#event# object to track completion of multiple command +submissions. The creation of [code]#event# objects may incur overheads that increase the latency of submitting commands to devices, even if the [code]#event# object is @@ -72,14 +74,15 @@ int main() { // Ensure that the two previous kernels complete before enqueueing more work. // This does not block the host, but enforces dependencies on the device. - khr::command_barrier(myQueue); + khr::enqueue_barrier(myQueue); // Launch an asynchronous kernel to compute matrix addition c = a + b. // Require that the scheduler create an event to track completion. range<2> local = { 2, 2 }; range<2> global = { N, M }; - auto reqs = khr::requirements(khr::tracking()); - auto ev = khr::launch_grouped(myQueue, global, local, reqs, [=](nd_item<2> it) { + event e = khr::make_event(myQueue.get_context()); + auto reqs = khr::requirements(khr::signal_event(e)); + khr::launch_grouped(myQueue, global, local, reqs, [=](nd_item<2> it) { size_t i = it.get_global_id(0); size_t j = it.get_global_id(1); size_t index = i * M + j; @@ -88,7 +91,7 @@ int main() { // Wait for all three kernels to complete before accessing the results. // This blocks the host until all previous kernels have completed. - ev->wait(); + e->wait(); std::cout << std::endl << "Result:" << std::endl; for (size_t i = 0; i < N; i++) { @@ -130,14 +133,24 @@ public: requirements(Requirements... values); void register-accessors(handler& h) const; // exposition only - void register-events(handler& h) const; // exposition only void register-kernel-bundle(handler& h) const; // exposition only - bool has-tracking() const; // exposition only + void register-wait-events(handler& h) const; // exposition only + void register-signal-event(handler& h) const; // exposition only }; -class tracking { -public: - tracking(bool enabled=true); +struct signal_event { + signal_event(const event& e); + event value; +}; + +struct wait_event { + wait_event(const event &e); + event value; +}; + +struct wait_events { + wait_events(const std::vector& evts); + std::vector value; }; } @@ -147,25 +160,23 @@ Each instance of a type listed below defines a specific scheduling requirement. For each type, the [code]#is_requirement# type trait is specialized such that [code]#is_requirement_v# returns [code]#true#. -* [code]#event#: The command must not begin executing until the event is - complete. - -* [code]#std::vector#: The command must not begin executing until all - events in the vector are complete. - * [code]#accessor#: The command must not begin executing until the [code]#buffer# associated with the [code]#accessor# can be accessed in a manner compatible with the specified [code]#access_mode#. The [code]#accessor# must have an [code]#AccessTarget# of [code]#target::device# or [code]#target::host_task#. -* [code]#tracking#: The command must be submitted such that its status can be - tracked via an [code]#event# when the [code]#tracking# object is constructed - with an [code]#enabled# value of [code]#true#. - * [code]#kernel_bundle#: The command must be submitted using a <> from the kernel bundle. +* [code]#wait_event#: The command must not begin executing until the event + is complete. + +* [code]#wait_events#: The command must not begin executing until all + events in the vector are complete. + +* [code]#signal_event#: When the command is complete, signals the provided event. + ''' .[apititle]#Default constructor# @@ -180,8 +191,11 @@ _Constraints_: * [code]#is_requirement_v# returns [code]#true# for each type in [code]#Requirements#; * [code]#Requirements# contains at most one - [code]#kernel_bundle#; and -* [code]#Requirements# contains at most one [code]#tracking#. + [code]#kernel_bundle#; +* [code]#Requirements# contains at most one + [code]#wait_event# or [code]#wait_events#; and +* [code]#Requirements# contains at most one + [code]#signal_event#; _Effects_: Constructs a [code]#requirements# object representing the set of requirements specified via the [code]#values# parameter pack. @@ -207,22 +221,6 @@ constructor of this [code]#requirements# object. ''' -.[apititle]#requirements::register-events# -[source,role=synopsis,id=api:register-events] ----- -void register-events(handler& h) const; ----- - -This function is exposition only. -It is shown only to help specify the effect of the functions below under "New -free functions". - -_Effects_: Calls [code]#h.depends_on# for each [code]#event# or -[code]#std::vector# passed to the constructor of this -[code]#requirements# object. - -''' - .[apititle]#requirements::register-kernel-bundle# [source,role=synopsis,id=api:register-kernel-bundle] ---- @@ -239,38 +237,34 @@ object and has no effect otherwise. ''' -.[apititle]#requirements::has-tracking# -[source,role=synopsis,id=api:has-tracking] +.[apititle]#requirements::register-wait-events# +[source,role=synopsis,id=api:register-wait-events] ---- -bool has-tracking() const; +void register-wait-events(handler& h) const; ---- This function is exposition only. It is shown only to help specify the effect of the functions below under "New free functions". -_Returns_: [code]#true# if this [code]#requirements# object was constructed with -a [code]#tracking# object with tracking enabled, and [code]#false# otherwise. +_Effects_: Calls [code]#h.depends_on# for each [code]#event# (wrapped in wait_event +object) or each event in [code]#std::vector# (wrapped in wait_events object) +passed to the constructor of this [code]#requirements# object. ''' -.[apititle]#tracking# constructor -[source,role=synopsis,id=api:tracking-ctor] +.[apititle]#requirements::register-signal-event# +[source,role=synopsis,id=api:register-signal-event] ---- -namespace sycl::khr { - -tracking(bool enabled=true); - -} +void register-signal-event(handler& h) const; ---- -_Effects_: Construct a [code]#tracking# object, representing a requirement that -a command must be submitted such that its state can be tracked via an -[code]#event# when [code]#enabled# is [code]#true#. +This function is exposition only. +It is shown only to help specify the effect of the functions below under "New +free functions". -{note}If an [code]#event# is _not_ required, [code]#tracking(false)# should be -expected to introduce a small amount of overhead compared to providing no -[code]#tracking# requirement.{endnote} +_Effects_: Registers an event (wrapped in signal_event object) to be signaled once +the command is complete. == New free functions @@ -284,22 +278,22 @@ expected to introduce a small amount of overhead compared to providing no namespace sycl::khr { template -std::optional launch(const queue& q, range<1> r, const requirements& reqs, const KernelType& k); (1) +void launch(const queue& q, range<1> r, const requirements& reqs, const KernelType& k); (1) template -std::optional launch(const queue& q, range<2> r, const requirements& reqs, const KernelType& k); (2) +void launch(const queue& q, range<2> r, const requirements& reqs, const KernelType& k); (2)void template -std::optional launch(const queue& q, range<3> r, const requirements& reqs, const KernelType& k); (3) +void launch(const queue& q, range<3> r, const requirements& reqs, const KernelType& k); (3) template -std::optional launch(const queue& q, range<1> r, const KernelType& k); (4) +void launch(const queue& q, range<1> r, const KernelType& k); (4) template -std::optional launch(const queue& q, range<2> r, const KernelType& k); (5) +void launch(const queue& q, range<2> r, const KernelType& k); (5) template -std::optional launch(const queue& q, range<3> r, const KernelType& k); (6) +void launch(const queue& q, range<3> r, const KernelType& k); (6) } ---- @@ -310,16 +304,16 @@ _Constraints_ (1-3): Any accessor in [code]#Requirements# must have a target of _Effects_ (1-3): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { reqs.register-accessors(h); reqs.register-kernel-bundle(h); + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.parallel_for(r, k); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (4-6): Equivalent to: [code]#return launch(q, r, {}, k);#. +_Effects_ (4-6): Equivalent to: [code]#launch(q, r, {}, k);#. ''' @@ -331,22 +325,22 @@ _Effects_ (4-6): Equivalent to: [code]#return launch(q, r, {}, k);#. namespace sycl::khr { template -std::optional launch(const queue& q, range<1> r, const requirements& reqs, const kernel& k, Args&&... args); (1) +void launch(const queue& q, range<1> r, const requirements& reqs, const kernel& k, Args&&... args); (1) template -std::optional launch(const queue& q, range<2> r, const requirements& reqs, const kernel& k, Args&&... args); (2) +void launch(const queue& q, range<2> r, const requirements& reqs, const kernel& k, Args&&... args); (2) template -std::optional launch(const queue& q, range<3> r, const requirements& reqs, const kernel& k, Args&&... args); (3) +void launch(const queue& q, range<3> r, const requirements& reqs, const kernel& k, Args&&... args); (3) template -std::optional launch(const queue& q, range<1> r, const kernel& k, Args&&... args); (4) +void launch(const queue& q, range<1> r, const kernel& k, Args&&... args); (4) template -std::optional launch(const queue& q, range<2> r, const kernel& k, Args&&... args); (5) +void launch(const queue& q, range<2> r, const kernel& k, Args&&... args); (5) template -std::optional launch(const queue& q, range<3> r, const kernel& k, Args&&... args); (6) +void launch(const queue& q, range<3> r, const kernel& k, Args&&... args); (6) } ---- @@ -359,15 +353,15 @@ _Constraints_ (1-3): _Effects_ (1-3): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.set_args(args...); h.parallel_for(r, k); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (4-6): Equivalent to: [code]#+return launch(q, r, {}, k, args...);+#. +_Effects_ (4-6): Equivalent to: [code]#+launch(q, r, {}, k, args...);+#. ''' @@ -379,22 +373,22 @@ _Effects_ (4-6): Equivalent to: [code]#+return launch(q, r, {}, k, args...);+#. namespace sycl::khr { template -std::optional launch_reduce(const queue& q, range<1> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) +void launch_reduce(const queue& q, range<1> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) template -std::optional launch_reduce(const queue& q, range<2> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) +void launch_reduce(const queue& q, range<2> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) template -std::optional launch_reduce(const queue& q, range<3> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) +void launch_reduce(const queue& q, range<3> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) template -std::optional launch_reduce(const queue& q, range<1> r, const KernelType& k, Reductions&&... reductions); (4) +void launch_reduce(const queue& q, range<1> r, const KernelType& k, Reductions&&... reductions); (4) template -std::optional launch_reduce(const queue& q, range<2> r, const KernelType& k, Reductions&&... reductions); (5) +void launch_reduce(const queue& q, range<2> r, const KernelType& k, Reductions&&... reductions); (5) template -std::optional launch_reduce(const queue& q, range<3> r, const KernelType& k, Reductions&&... reductions); (6) +void launch_reduce(const queue& q, range<3> r, const KernelType& k, Reductions&&... reductions); (6) } ---- @@ -411,16 +405,16 @@ the [code]#reduction# function. _Effects_ (1-3): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { reqs.register-accessors(h); reqs.register-kernel-bundle(h); + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.parallel_for(r, reductions..., k); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (4-6): Equivalent to [code]#+return launch_reduce(q, r, {}, +_Effects_ (4-6): Equivalent to [code]#+launch_reduce(q, r, {}, reductions...);+#. ''' @@ -433,22 +427,22 @@ reductions...);+#. namespace sycl::khr { template -std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k); (1) +void launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k); (1) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k); (2) +void launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k); (2) template -std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k); (3) +void launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k); (3) template -std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const KernelType& k); (4) +void launch_grouped(const queue& q, range<1> r, range<1> size, const KernelType& k); (4) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const KernelType& k); (5) +void launch_grouped(const queue& q, range<2> r, range<2> size, const KernelType& k); (5) template -std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const KernelType& k); (6) +void launch_grouped(const queue& q, range<3> r, range<3> size, const KernelType& k); (6) } ---- @@ -459,16 +453,16 @@ _Constraints_ (1-3): Any accessor in [code]#Requirements# must have a target of _Effects_ (1-3): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { reqs.register-accessors(h); reqs.register-kernel-bundle(h); + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.parallel_for(nd_range(r, size), k); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (4-6): Equivalent to [code]#return launch_grouped(q, r, size, {}, +_Effects_ (4-6): Equivalent to [code]#launch_grouped(q, r, size, {}, k);#. ''' @@ -481,22 +475,22 @@ k);#. namespace sycl::khr { template -std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const kernel& k, Args&&... args); (1) +void launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const kernel& k, Args&&... args); (1) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const kernel& k, Args&&... args); (2) +void launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const kernel& k, Args&&... args); (2) template -std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const kernel& k, Args&&... args); (3) +void launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const kernel& k, Args&&... args); (3) template -std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const kernel& k, Args&&... args); (4) +void launch_grouped(const queue& q, range<1> r, range<1> size, const kernel& k, Args&&... args); (4) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (5) +void launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (5) template -std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (6) +void launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (6) } ---- @@ -508,15 +502,15 @@ _Constraints_ (1-3): _Effects_ (1-3): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.set_args(args...); h.parallel_for(nd_range(r, size), k); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (4-6): Equivalent to: [code]#+return launch_grouped(q, r, size, {}, k, +_Effects_ (4-6): Equivalent to: [code]#+launch_grouped(q, r, size, {}, k, args...);+#. ''' @@ -529,22 +523,22 @@ args...);+#. namespace sycl::khr { template -std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) +void launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) template -std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) +void launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) template -std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) +void launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) template -std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const KernelType& k, Reductions&&... reductions); (4) +void launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const KernelType& k, Reductions&&... reductions); (4) template -std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const KernelType& k, Reductions&&... reductions); (5) +void launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const KernelType& k, Reductions&&... reductions); (5) template -std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const KernelType& k, Reductions&&... reductions); (6) +void launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const KernelType& k, Reductions&&... reductions); (6) } ---- @@ -561,16 +555,16 @@ the [code]#reduction# function. _Effects_ (1-3): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { reqs.register-accessors(h); reqs.register-kernel-bundle(h); + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.parallel_for(nd_range(r, size), reductions..., k); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (4-6): Equivalent to [code]#+return launch_grouped_reduce(q, r, size, +_Effects_ (4-6): Equivalent to [code]#+launch_grouped_reduce(q, r, size, {}, k, reductions...);+#. ''' @@ -583,10 +577,10 @@ _Effects_ (4-6): Equivalent to [code]#+return launch_grouped_reduce(q, r, size, namespace sycl::khr { template -std::optional launch_task(const queue& q, const requirements& reqs, const KernelType& k); (1) +void launch_task(const queue& q, const requirements& reqs, const KernelType& k); (1) template -std::optional launch_task(const queue& q, const KernelType& k); (2) +void launch_task(const queue& q, const KernelType& k); (2) } ---- @@ -597,16 +591,16 @@ _Constraints_ (1): Any accessor in [code]#Requirements# must have a target of _Effects_ (1): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { reqs.register-accessors(h); reqs.register-kernel-bundle(h); + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.single_task(k); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (2): Equivalent to [code]#return launch_task(q, {}, k);#. +_Effects_ (2): Equivalent to [code]#launch_task(q, {}, k);#. ''' @@ -618,10 +612,10 @@ _Effects_ (2): Equivalent to [code]#return launch_task(q, {}, k);#. namespace sycl::khr { template -std::optional launch_task(const queue& q, const requirements& reqs, const kernel& k, Args&&... args); (1) +void launch_task(const queue& q, const requirements& reqs, const kernel& k, Args&&... args); (1) template -std::optional launch_task(const queue& q, const kernel& k, Args&&... args); (2) +void launch_task(const queue& q, const kernel& k, Args&&... args); (2) } ---- @@ -633,15 +627,15 @@ _Constraints_ (1): _Effects_ (1): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.set_args(args...); h.single_task(k); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (2): Equivalent to [code]#+return launch_task(q, {}, k, args...);+#. +_Effects_ (2): Equivalent to [code]#+launch_task(q, {}, k, args...);+#. ''' @@ -653,10 +647,10 @@ _Effects_ (2): Equivalent to [code]#+return launch_task(q, {}, k, args...);+#. namespace sycl::khr { template -std::optional launch_host(const queue& q, const requirements& reqs, T&& hostTaskCallable); (1) +void launch_host(const queue& q, const requirements& reqs, T&& hostTaskCallable); (1) template -std::optional launch_host(const queue& q, T&& hostTaskCallable); (2) +void launch_host(const queue& q, T&& hostTaskCallable); (2) } ---- @@ -670,13 +664,14 @@ _Constraints_: _Effects_ (1): Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.host_task(hostTaskCallable); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- -_Effects_ (2): Equivalent to [code]#+return launch_host(q, {}, +_Effects_ (2): Equivalent to [code]#+launch_host(q, {}, hostTaskCallable);+#. ''' @@ -689,7 +684,7 @@ hostTaskCallable);+#. namespace sycl::khr { template -std::optional enqueue_memcpy(const queue& q, void* dest, const void* src, size_t numBytes, const requirements& reqs = {}); +void enqueue_memcpy(const queue& q, void* dest, const void* src, size_t numBytes, const requirements& reqs = {}); } ---- @@ -702,11 +697,11 @@ _Constraints_: _Effects_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.memcpy(dest, src, numBytes); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -717,7 +712,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional enqueue_copy(const queue& q, const T* src, T* dest, size_t count, const requirements& reqs = {}); +void enqueue_copy(const queue& q, const T* src, T* dest, size_t count, const requirements& reqs = {}); } ---- @@ -744,11 +739,11 @@ _Preconditions_: _Effects_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.copy(src, dest, count); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -759,10 +754,10 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional enqueue_copy(const queue& q, const SrcT* src, accessor dest, const requirements& reqs = {}); (1) +void enqueue_copy(const queue& q, const SrcT* src, accessor dest, const requirements& reqs = {}); (1) template -std::optional enqueue_copy(const queue& q, std::shared_ptr src, accessor dest, const requirements& reqs = {}); (2) +void enqueue_copy(const queue& q, std::shared_ptr src, accessor dest, const requirements& reqs = {}); (2) } ---- @@ -786,12 +781,12 @@ _Preconditions_: _Effects_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.require(dest); h.copy(src, dest); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -802,10 +797,10 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional enqueue_copy(const queue& q, accessor src, DestT* dest, const requirements& reqs = {}); (1) +void enqueue_copy(const queue& q, accessor src, DestT* dest, const requirements& reqs = {}); (1) template -std::optional enqueue_copy(const queue& q, accessor src, std::shared_ptr dest, const requirements& reqs = {}); (2) +void enqueue_copy(const queue& q, accessor src, std::shared_ptr dest, const requirements& reqs = {}); (2) } ---- @@ -829,12 +824,12 @@ _Preconditions_: _Effects_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.require(src); h.copy(src, dest); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -845,7 +840,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional enqueue_copy(const queue& q, accessor src, accessor dest, const requirements& reqs = {}); +void enqueue_copy(const queue& q, accessor src, accessor dest, const requirements& reqs = {}); } ---- @@ -865,8 +860,9 @@ _Constraints_: _Effects_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.require(src); h.require(dest); h.copy(src, dest); @@ -884,7 +880,7 @@ code if [code]#dest.get_count() < src.get_count()#. namespace sycl::khr { template -std::optional enqueue_memset(const queue& q, void* ptr, int value, size_t numBytes, const requirements& reqs = {}); +void enqueue_memset(const queue& q, void* ptr, int value, size_t numBytes, const requirements& reqs = {}); } ---- @@ -897,11 +893,11 @@ _Constraints_: _Effects_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.memset(ptr, value, numBytes); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -912,10 +908,10 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional enqueue_fill(const queue& q, T* ptr, const T& pattern, size_t count, const requirements& reqs = {}); (1) +void enqueue_fill(const queue& q, T* ptr, const T& pattern, size_t count, const requirements& reqs = {}); (1) template -std::optional enqueue_fill(const queue& q, accessor dest, const T& src, const requirements& reqs = {}); (2) +void enqueue_fill(const queue& q, accessor dest, const T& src, const requirements& reqs = {}); (2) } ---- @@ -934,21 +930,21 @@ _Constraints (2)_: _Effects (1)_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.fill(ptr, pattern, count); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- _Effects (2)_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.fill(dest, src); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -959,7 +955,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional enqueue_update_host(const queue& q, accessor acc, const requirements& reqs = {}); +void enqueue_update_host(const queue& q, accessor acc, const requirements& reqs = {}); } ---- @@ -972,12 +968,12 @@ _Constraints_: _Effects_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.require(acc); h.update_host(acc); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -988,7 +984,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional enqueue_prefetch(const queue& q, void* ptr, size_t numBytes, const requirements& reqs = {}); +void enqueue_prefetch(const queue& q, void* ptr, size_t numBytes, const requirements& reqs = {}); } ---- @@ -1000,11 +996,11 @@ _Constraints_: _Effects_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.prefetch(ptr, numBytes); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' @@ -1015,7 +1011,7 @@ return (reqs.has-tracking()) ? ev : std::nullopt; namespace sycl::khr { template -std::optional enqueue_mem_advise(const queue& q, void* ptr, size_t numBytes, int advice, const requirements& reqs = {}); +void enqueue_mem_advise(const queue& q, void* ptr, size_t numBytes, int advice, const requirements& reqs = {}); } ---- @@ -1027,24 +1023,24 @@ _Constraints_: _Effects_: Equivalent to: + [source,sycl] ---- -event ev = q.submit([&](handler& h) { - reqs.register-events(h); +q.submit([&](handler& h) { + reqs.register-wait-events(h); + reqs.register-signal-event(h); h.mem_advise(ptr, numBytes, advice); }); -return (reqs.has-tracking()) ? ev : std::nullopt; ---- ''' === Command and event barriers -.[apititle]#command_barrier# -[source,role=synopsis,id=api:command_barrier] +.[apititle]#enqueue_barrier# +[source,role=synopsis,id=api:enqueue_barrier] ---- namespace sycl::khr { template -std::optional command_barrier(const queue& q, const requirements& reqs = {}); +void enqueue_barrier(const queue& q, const requirements& reqs = {}); } ---- @@ -1057,20 +1053,21 @@ _Effects_: Enqueues a command barrier. Any commands submitted after this barrier cannot begin execution until: * All commands previously submitted to this queue have completed; and -* All events in [code]#reqs# have completed. +* All events (wrapped in wait_event or wait_events) in [code]#reqs# have completed. -{note}If a [code]#command_barrier# is submitted to an in-order queue with no -requirements, then this operation may be a no-op.{endnote} +{note}If a barrier is submitted to an in-order queue with no +requirements, then this operation may be a no-op. If an event wrapped in signal_event +was provided in [code]#reqs#, it will be signaled.{endnote} ''' -.[apititle]#event_barrier# -[source,role=synopsis,id=api:event_barrier] +.[apititle]#enqueue_signal_event# +[source,role=synopsis,id=api:enqueue_signal_event] ---- namespace sycl::khr { template -std::optional event_barrier(const queue& q, const requirements& reqs = {}); +void enqueue_signal_event(const queue& q, const event& e, const requirements& reqs = {}); } ---- @@ -1079,11 +1076,77 @@ _Constraints_: * [code]#Requirements# does not contain a [code]#kernel_bundle#; and * [code]#Requirements# does not contain any accessors. +_Effects_: Enqueues a command barrier, signals the provided event. +Any commands submitted after this barrier cannot begin execution until: + +* All commands previously submitted to this queue have completed; and +* All events (wrapped in wait_event or wait_events) in [code]#reqs# have completed. + +Once the above conditions are met, signals the provided event ([code]#e#). + +{note}The event is disassociated from any previous command it was tracking.{endnote} + +''' + +.[apititle]#enqueue_wait_event# +[source,role=synopsis,id=api:enqueue_wait_event] +---- +namespace sycl::khr { + +template +void enqueue_wait_event(const queue& q, const event& e, const requirements& reqs = {}); + +} +---- +_Constraints_: + +* [code]#Requirements# contains the [code]#signal_event# object; or +* [code]#Requirements# is empty. + _Effects_: Enqueues an event barrier. -Any commands submitted after this barrier cannot begin execution until all -requirements in [code]#reqs# are satisfied. +Any commands submitted after this barrier cannot begin execution until the provided event ([code]#e#) +has completed. + +Once the above conditions are met, signals the event (wrapped in signal_event), if provided. + +''' -{note}If an [code]#event_barrier# is submitted with no requirements, then this -operation may be a no-op.{endnote} +.[apititle]#enqueue_wait_events# +[source,role=synopsis,id=api:enqueue_wait_events] +---- +namespace sycl::khr { + +template +void enqueue_wait_events(const queue& q, const std::vector& evts, const requirements& reqs = {}); (1) + +template +void enqueue_wait_events(const queue& q, const std::span& evts, const requirements& reqs = {}); // Requires C++20 (2) + +} +---- +_Constraints_ (1-2): + +* [code]#Requirements# contains the [code]#signal_event# object; or +* [code]#Requirements# is empty. + +_Effects_ (1-2): Enqueues an event barrier. +Any commands submitted after this barrier cannot begin execution until the provided events ([code]#evts#) have completed. + +Once the above conditions are met, signals the event (wrapped in signal_event), if provided. + +''' + +=== Event-related helper functions + +.[apititle]#make_event# +[source,role=synopsis,id=api:make_event] +---- +namespace sycl::khr { + +event make_event(context c); + +} +---- +_Effects_: Creates an event associated with a given context. The event can then be used with [code]#enqueue_signal_event# function, with signal_event object and as a dependency for other commands. ''' From 1e0506ab914bfe813fe5e8a16ad975d5c8ab8053 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Wed, 21 Jan 2026 11:45:30 +0000 Subject: [PATCH 62/73] Fix formatting --- .../sycl_khr_free_function_commands.adoc | 75 ++++++++++--------- 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 1d53b5e9e..e865d2ce0 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -2,9 +2,9 @@ This extension provides an alternative mechanism for submitting commands to a device via free-functions that require developers to opt-in to the handling of -requirements and the creation of [code]#event# objects. It also adds an ability -to reuse the same [code]#event# object to track completion of multiple command -submissions. +requirements and the creation of [code]#event# objects. +It also adds an ability to reuse the same [code]#event# object to track +completion of multiple command submissions. The creation of [code]#event# objects may incur overheads that increase the latency of submitting commands to devices, even if the [code]#event# object is @@ -169,13 +169,14 @@ For each type, the [code]#is_requirement# type trait is specialized such that * [code]#kernel_bundle#: The command must be submitted using a <> from the kernel bundle. -* [code]#wait_event#: The command must not begin executing until the event - is complete. +* [code]#wait_event#: The command must not begin executing until the event is + complete. -* [code]#wait_events#: The command must not begin executing until all - events in the vector are complete. +* [code]#wait_events#: The command must not begin executing until all events in + the vector are complete. -* [code]#signal_event#: When the command is complete, signals the provided event. +* [code]#signal_event#: When the command is complete, signals the provided + event. ''' @@ -192,10 +193,9 @@ _Constraints_: [code]#Requirements#; * [code]#Requirements# contains at most one [code]#kernel_bundle#; -* [code]#Requirements# contains at most one - [code]#wait_event# or [code]#wait_events#; and -* [code]#Requirements# contains at most one - [code]#signal_event#; +* [code]#Requirements# contains at most one [code]#wait_event# or + [code]#wait_events#; and +* [code]#Requirements# contains at most one [code]#signal_event#; _Effects_: Constructs a [code]#requirements# object representing the set of requirements specified via the [code]#values# parameter pack. @@ -247,9 +247,10 @@ This function is exposition only. It is shown only to help specify the effect of the functions below under "New free functions". -_Effects_: Calls [code]#h.depends_on# for each [code]#event# (wrapped in wait_event -object) or each event in [code]#std::vector# (wrapped in wait_events object) -passed to the constructor of this [code]#requirements# object. +_Effects_: Calls [code]#h.depends_on# for each [code]#event# (wrapped in +wait_event object) or each event in [code]#std::vector# (wrapped in +wait_events object) passed to the constructor of this [code]#requirements# +object. ''' @@ -263,8 +264,8 @@ This function is exposition only. It is shown only to help specify the effect of the functions below under "New free functions". -_Effects_: Registers an event (wrapped in signal_event object) to be signaled once -the command is complete. +_Effects_: Registers an event (wrapped in signal_event object) to be signaled +once the command is complete. == New free functions @@ -462,8 +463,7 @@ q.submit([&](handler& h) { }); ---- -_Effects_ (4-6): Equivalent to [code]#launch_grouped(q, r, size, {}, -k);#. +_Effects_ (4-6): Equivalent to [code]#launch_grouped(q, r, size, {}, k);#. ''' @@ -564,8 +564,8 @@ q.submit([&](handler& h) { }); ---- -_Effects_ (4-6): Equivalent to [code]#+launch_grouped_reduce(q, r, size, -{}, k, reductions...);+#. +_Effects_ (4-6): Equivalent to [code]#+launch_grouped_reduce(q, r, size, {}, k, +reductions...);+#. ''' @@ -671,8 +671,7 @@ q.submit([&](handler& h) { }); ---- -_Effects_ (2): Equivalent to [code]#+launch_host(q, {}, -hostTaskCallable);+#. +_Effects_ (2): Equivalent to [code]#+launch_host(q, {}, hostTaskCallable);+#. ''' @@ -1055,9 +1054,10 @@ Any commands submitted after this barrier cannot begin execution until: * All commands previously submitted to this queue have completed; and * All events (wrapped in wait_event or wait_events) in [code]#reqs# have completed. -{note}If a barrier is submitted to an in-order queue with no -requirements, then this operation may be a no-op. If an event wrapped in signal_event -was provided in [code]#reqs#, it will be signaled.{endnote} +{note}If a barrier is submitted to an in-order queue with no requirements, then +this operation may be a no-op. +If an event wrapped in signal_event was provided in [code]#reqs#, it will be +signaled.{endnote} ''' @@ -1080,11 +1080,13 @@ _Effects_: Enqueues a command barrier, signals the provided event. Any commands submitted after this barrier cannot begin execution until: * All commands previously submitted to this queue have completed; and -* All events (wrapped in wait_event or wait_events) in [code]#reqs# have completed. +* All events (wrapped in wait_event or wait_events) in [code]#reqs# have + completed. Once the above conditions are met, signals the provided event ([code]#e#). -{note}The event is disassociated from any previous command it was tracking.{endnote} +{note}The event is disassociated from any previous command it was +tracking.{endnote} ''' @@ -1104,10 +1106,11 @@ _Constraints_: * [code]#Requirements# is empty. _Effects_: Enqueues an event barrier. -Any commands submitted after this barrier cannot begin execution until the provided event ([code]#e#) -has completed. +Any commands submitted after this barrier cannot begin execution until the +provided event ([code]#e#) has completed. -Once the above conditions are met, signals the event (wrapped in signal_event), if provided. +Once the above conditions are met, signals the event (wrapped in signal_event), +if provided. ''' @@ -1130,9 +1133,11 @@ _Constraints_ (1-2): * [code]#Requirements# is empty. _Effects_ (1-2): Enqueues an event barrier. -Any commands submitted after this barrier cannot begin execution until the provided events ([code]#evts#) have completed. +Any commands submitted after this barrier cannot begin execution until the +provided events ([code]#evts#) have completed. -Once the above conditions are met, signals the event (wrapped in signal_event), if provided. +Once the above conditions are met, signals the event (wrapped in signal_event), +if provided. ''' @@ -1147,6 +1152,8 @@ event make_event(context c); } ---- -_Effects_: Creates an event associated with a given context. The event can then be used with [code]#enqueue_signal_event# function, with signal_event object and as a dependency for other commands. +_Effects_: Creates an event associated with a given context. +The event can then be used with [code]#enqueue_signal_event# function, with +signal_event object and as a dependency for other commands. ''' From 00536113b39058655585c7a0709be9bf239942d3 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Wed, 21 Jan 2026 11:55:28 +0000 Subject: [PATCH 63/73] Fix formatting --- adoc/extensions/sycl_khr_free_function_commands.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index e865d2ce0..4c2eb2de9 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -1052,7 +1052,8 @@ _Effects_: Enqueues a command barrier. Any commands submitted after this barrier cannot begin execution until: * All commands previously submitted to this queue have completed; and -* All events (wrapped in wait_event or wait_events) in [code]#reqs# have completed. +* All events (wrapped in wait_event or wait_events) in [code]#reqs# have + completed. {note}If a barrier is submitted to an in-order queue with no requirements, then this operation may be a no-op. From 94262f856e7e20d46baa2bdd5e77973d69d6cab3 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Wed, 21 Jan 2026 11:59:37 +0000 Subject: [PATCH 64/73] Add a host task function restriction, so it cannot make any SYCL API calls. --- adoc/extensions/sycl_khr_free_function_commands.adoc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 4c2eb2de9..7345cc8ff 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -657,9 +657,10 @@ void launch_host(const queue& q, T&& hostTaskCallable); (2) _Constraints_: -* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain a [code]#kernel_bundle#; * Any accessor in [code]#Requirements# must have a target of -[code]#target::host_task#. +[code]#target::host_task#; and +* The host function does not make any SYCL API calls. _Effects_ (1): Equivalent to: + [source,sycl] From 3064bc137aaf7cd3762b8c454927c942e9258ed1 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 3 Feb 2026 11:28:04 +0000 Subject: [PATCH 65/73] Clarified the constraints for the requirements object, for enqueue_signal_event function. --- adoc/extensions/sycl_khr_free_function_commands.adoc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 7345cc8ff..926a5f6d6 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -1075,8 +1075,9 @@ void enqueue_signal_event(const queue& q, const event& e, const requirements Date: Thu, 5 Feb 2026 08:57:31 +0100 Subject: [PATCH 66/73] Update adoc/extensions/sycl_khr_free_function_commands.adoc Co-authored-by: Greg Lueck --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 926a5f6d6..f66d32d86 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -91,7 +91,7 @@ int main() { // Wait for all three kernels to complete before accessing the results. // This blocks the host until all previous kernels have completed. - e->wait(); + e.wait(); std::cout << std::endl << "Result:" << std::endl; for (size_t i = 0; i < N; i++) { From 14355fda844b46f56904855f863c9d4303c2fe31 Mon Sep 17 00:00:00 2001 From: Slawomir Ptak Date: Thu, 5 Feb 2026 08:58:34 +0100 Subject: [PATCH 67/73] Update adoc/extensions/sycl_khr_free_function_commands.adoc Co-authored-by: Greg Lueck --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index f66d32d86..b087ef076 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -282,7 +282,7 @@ template void launch(const queue& q, range<1> r, const requirements& reqs, const KernelType& k); (1) template -void launch(const queue& q, range<2> r, const requirements& reqs, const KernelType& k); (2)void +void launch(const queue& q, range<2> r, const requirements& reqs, const KernelType& k); (2) template void launch(const queue& q, range<3> r, const requirements& reqs, const KernelType& k); (3) From 819692684d724e6ddb097844c552d136372649bd Mon Sep 17 00:00:00 2001 From: Slawomir Ptak Date: Thu, 5 Feb 2026 09:50:59 +0100 Subject: [PATCH 68/73] Update adoc/extensions/sycl_khr_free_function_commands.adoc Co-authored-by: Greg Lueck --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index b087ef076..1481ed930 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -1151,7 +1151,7 @@ if provided. ---- namespace sycl::khr { -event make_event(context c); +event make_event(const context& c); } ---- From 8b3f92afb1d422831cd8756dc3a273d1b15ee422 Mon Sep 17 00:00:00 2001 From: Slawomir Ptak Date: Thu, 5 Feb 2026 09:52:03 +0100 Subject: [PATCH 69/73] Update adoc/extensions/sycl_khr_free_function_commands.adoc Co-authored-by: Greg Lueck --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 1481ed930..8d64fb573 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -649,7 +649,7 @@ namespace sycl::khr { template void launch_host(const queue& q, const requirements& reqs, T&& hostTaskCallable); (1) -template +template void launch_host(const queue& q, T&& hostTaskCallable); (2) } From dbb35ef9aa25b43e6cf28074df4fdd415487e0de Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Thu, 5 Feb 2026 13:58:05 +0000 Subject: [PATCH 70/73] Address the review feedback, and some minor changes. --- .../sycl_khr_free_function_commands.adoc | 80 +++++++++++++------ 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 8d64fb573..28e79bac0 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -175,8 +175,9 @@ For each type, the [code]#is_requirement# type trait is specialized such that * [code]#wait_events#: The command must not begin executing until all events in the vector are complete. -* [code]#signal_event#: When the command is complete, signals the provided - event. +* [code]#signal_event#: When the command is submitted, the event is immediately disassociated + with any previous command, and its status is set to "submitted". When the command is complete, + the provided event is signaled. ''' @@ -659,8 +660,10 @@ _Constraints_: * [code]#Requirements# does not contain a [code]#kernel_bundle#; * Any accessor in [code]#Requirements# must have a target of -[code]#target::host_task#; and -* The host function does not make any SYCL API calls. +[code]#target::host_task#. + +_Preconditions_: The callable function [code]#hostTaskCallable# does not call any function +in the [code]#sycl# namespace. _Effects_ (1): Equivalent to: + [source,sycl] @@ -1050,16 +1053,19 @@ _Constraints_: * [code]#Requirements# does not contain any accessors. _Effects_: Enqueues a command barrier. -Any commands submitted after this barrier cannot begin execution until: +Any commands submitted after this barrier cannot begin execution until the barrier +completes. +The barrier completes when: * All commands previously submitted to this queue have completed; and -* All events (wrapped in wait_event or wait_events) in [code]#reqs# have - completed. +* All events (wrapped in [code]#wait_event# or [code]#wait_events#) in +[code]#reqs# have completed. + +if [code]#reqs# contains a [code]#signal_event# requirement, the event is signaled +when the barrier completes. {note}If a barrier is submitted to an in-order queue with no requirements, then -this operation may be a no-op. -If an event wrapped in signal_event was provided in [code]#reqs#, it will be -signaled.{endnote} +this operation may be a no-op.{endnote} ''' @@ -1069,7 +1075,7 @@ signaled.{endnote} namespace sycl::khr { template -void enqueue_signal_event(const queue& q, const event& e, const requirements& reqs = {}); +void enqueue_signal_event(const queue& q, event& e, const requirements& reqs = {}); } ---- @@ -1079,17 +1085,20 @@ _Constraints_: * [code]#Requirements# contains the [code]#wait_events# object; or * [code]#Requirements# is empty. -_Effects_: Enqueues a command barrier, signals the provided event. -Any commands submitted after this barrier cannot begin execution until: +_Effects_: Enqueues a command barrier. The event [code]#e# is immediately disassociated +with any previous command, and its status is set to "submitted". +Any commands submitted after this barrier cannot begin execution until the barrier +completes. +The barrier completes when: * All commands previously submitted to this queue have completed; and -* All events (wrapped in wait_event or wait_events) in [code]#reqs# have - completed. +* All events (wrapped in [code]#wait_event# or [code]#wait_events#) in + [code]#reqs# have completed. -Once the above conditions are met, signals the provided event ([code]#e#). +Once the above conditions are met, the provided event [code]#e# is signaled. -{note}The event is disassociated from any previous command it was -tracking.{endnote} +_Throws_: A synchronous [code]#exception# with the [code]#errc::invalid# error +code if [code]#e# and [code]#q# don't have the same context. ''' @@ -1112,7 +1121,7 @@ _Effects_: Enqueues an event barrier. Any commands submitted after this barrier cannot begin execution until the provided event ([code]#e#) has completed. -Once the above conditions are met, signals the event (wrapped in signal_event), +Once the above conditions are met, the event (wrapped in signal_event) is signaled, if provided. ''' @@ -1139,7 +1148,7 @@ _Effects_ (1-2): Enqueues an event barrier. Any commands submitted after this barrier cannot begin execution until the provided events ([code]#evts#) have completed. -Once the above conditions are met, signals the event (wrapped in signal_event), +Once the above conditions are met, the event (wrapped in [code]#signal_event#) is signaled, if provided. ''' @@ -1155,8 +1164,31 @@ event make_event(const context& c); } ---- -_Effects_: Creates an event associated with a given context. -The event can then be used with [code]#enqueue_signal_event# function, with -signal_event object and as a dependency for other commands. +_Returns_: An event that is associated with context [code]#c#. The event is in the "signaled" +state. -''' +{note}The event can be passed to the [code]#enqueue_signal_event# function or included in the +[code]#requirements# object (wrapped in the [code]#signal_event# object), and as a dependency for +other commands.{endnote} + +== Interactions between event APIs + +An event [code]#e# created via [code]#make_event# can be used as a command dependency using the APIs +introduced by this extension (e.g. via the requirements object or [code]#enqueue_wait_event#) +or using other APIs (e.g. [code]#handler::depends_on#) for a command submitted to some queue +[code]#q#. It is not necessary for the context of [code]#e# to match the context of [code]#q#. + +An event [code]#e# created via [code]#make_event# can also be used for signaling (using the +[code]#enqueue_signal_event# function or included in the [code]#requirements# object), for a command +submitted to some queue [code]#q#. In this case, the context of [code]#e# and [code]#q# must match. + + +If an event [code]#e# is used as a command dependency for some command [code]#c#, the dependency +is captured at the point when [code]#c# is submitted. It is legal to reassociate the event E to +a new command via enqueue_signal_event even before command [code]#c# completes. Doing so does not +change the dependency for command [code]#c#. + +If another host thread is blocked waiting for event [code]#e# to complete via [code]#event::wait# +or [code]#event::wait_and_throw# when event [code]#e# is reassociated with a new command via +[code]#enqueue_signal_event#, the behavior of the [code]#event::wait# or [code]#event::wait_and_throw# +call is undefined. From bfaa2fd6ec1140b85cef1fc812663895e0a38ad2 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Thu, 5 Feb 2026 14:47:05 +0000 Subject: [PATCH 71/73] Fix formatting. --- .../sycl_khr_free_function_commands.adoc | 87 ++++++++++--------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 28e79bac0..b330efa26 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -175,9 +175,9 @@ For each type, the [code]#is_requirement# type trait is specialized such that * [code]#wait_events#: The command must not begin executing until all events in the vector are complete. -* [code]#signal_event#: When the command is submitted, the event is immediately disassociated - with any previous command, and its status is set to "submitted". When the command is complete, - the provided event is signaled. +* [code]#signal_event#: When the command is submitted, the event is immediately + disassociated with any previous command, and its status is set to "submitted". + When the command is complete, the provided event is signaled. ''' @@ -662,8 +662,8 @@ _Constraints_: * Any accessor in [code]#Requirements# must have a target of [code]#target::host_task#. -_Preconditions_: The callable function [code]#hostTaskCallable# does not call any function -in the [code]#sycl# namespace. +_Preconditions_: The callable function [code]#hostTaskCallable# does not call +any function in the [code]#sycl# namespace. _Effects_ (1): Equivalent to: + [source,sycl] @@ -1053,16 +1053,16 @@ _Constraints_: * [code]#Requirements# does not contain any accessors. _Effects_: Enqueues a command barrier. -Any commands submitted after this barrier cannot begin execution until the barrier -completes. +Any commands submitted after this barrier cannot begin execution until the +barrier completes. The barrier completes when: * All commands previously submitted to this queue have completed; and * All events (wrapped in [code]#wait_event# or [code]#wait_events#) in [code]#reqs# have completed. -if [code]#reqs# contains a [code]#signal_event# requirement, the event is signaled -when the barrier completes. +if [code]#reqs# contains a [code]#signal_event# requirement, the event is +signaled when the barrier completes. {note}If a barrier is submitted to an in-order queue with no requirements, then this operation may be a no-op.{endnote} @@ -1085,10 +1085,11 @@ _Constraints_: * [code]#Requirements# contains the [code]#wait_events# object; or * [code]#Requirements# is empty. -_Effects_: Enqueues a command barrier. The event [code]#e# is immediately disassociated -with any previous command, and its status is set to "submitted". -Any commands submitted after this barrier cannot begin execution until the barrier -completes. +_Effects_: Enqueues a command barrier. +The event [code]#e# is immediately disassociated with any previous command, and +its status is set to "submitted". +Any commands submitted after this barrier cannot begin execution until the +barrier completes. The barrier completes when: * All commands previously submitted to this queue have completed; and @@ -1121,8 +1122,8 @@ _Effects_: Enqueues an event barrier. Any commands submitted after this barrier cannot begin execution until the provided event ([code]#e#) has completed. -Once the above conditions are met, the event (wrapped in signal_event) is signaled, -if provided. +Once the above conditions are met, the event (wrapped in signal_event) is +signaled, if provided. ''' @@ -1148,8 +1149,8 @@ _Effects_ (1-2): Enqueues an event barrier. Any commands submitted after this barrier cannot begin execution until the provided events ([code]#evts#) have completed. -Once the above conditions are met, the event (wrapped in [code]#signal_event#) is signaled, -if provided. +Once the above conditions are met, the event (wrapped in [code]#signal_event#) +is signaled, if provided. ''' @@ -1164,31 +1165,35 @@ event make_event(const context& c); } ---- -_Returns_: An event that is associated with context [code]#c#. The event is in the "signaled" -state. +_Returns_: An event that is associated with context [code]#c#. +The event is in the "signaled" state. -{note}The event can be passed to the [code]#enqueue_signal_event# function or included in the -[code]#requirements# object (wrapped in the [code]#signal_event# object), and as a dependency for -other commands.{endnote} +{note}The event can be passed to the [code]#enqueue_signal_event# function or +included in the [code]#requirements# object (wrapped in the [code]#signal_event# +object), and as a dependency for other commands.{endnote} == Interactions between event APIs -An event [code]#e# created via [code]#make_event# can be used as a command dependency using the APIs -introduced by this extension (e.g. via the requirements object or [code]#enqueue_wait_event#) -or using other APIs (e.g. [code]#handler::depends_on#) for a command submitted to some queue -[code]#q#. It is not necessary for the context of [code]#e# to match the context of [code]#q#. - -An event [code]#e# created via [code]#make_event# can also be used for signaling (using the -[code]#enqueue_signal_event# function or included in the [code]#requirements# object), for a command -submitted to some queue [code]#q#. In this case, the context of [code]#e# and [code]#q# must match. - - -If an event [code]#e# is used as a command dependency for some command [code]#c#, the dependency -is captured at the point when [code]#c# is submitted. It is legal to reassociate the event E to -a new command via enqueue_signal_event even before command [code]#c# completes. Doing so does not -change the dependency for command [code]#c#. - -If another host thread is blocked waiting for event [code]#e# to complete via [code]#event::wait# -or [code]#event::wait_and_throw# when event [code]#e# is reassociated with a new command via -[code]#enqueue_signal_event#, the behavior of the [code]#event::wait# or [code]#event::wait_and_throw# -call is undefined. +An event [code]#e# created via [code]#make_event# can be used as a command +dependency using the APIs introduced by this extension (e.g. via the +requirements object or [code]#enqueue_wait_event#) or using other APIs (e.g. +[code]#handler::depends_on#) for a command submitted to some queue [code]#q#. +It is not necessary for the context of [code]#e# to match the context of +[code]#q#. + +An event [code]#e# created via [code]#make_event# can also be used for signaling +(using the [code]#enqueue_signal_event# function or included in the +[code]#requirements# object), for a command submitted to some queue [code]#q#. +In this case, the context of [code]#e# and [code]#q# must match. + + +If an event [code]#e# is used as a command dependency for some command +[code]#c#, the dependency is captured at the point when [code]#c# is submitted. +It is legal to reassociate the event E to a new command via enqueue_signal_event +even before command [code]#c# completes. +Doing so does not change the dependency for command [code]#c#. + +If another host thread is blocked waiting for event [code]#e# to complete via +[code]#event::wait# or [code]#event::wait_and_throw# when event [code]#e# is +reassociated with a new command via [code]#enqueue_signal_event#, the behavior +of the [code]#event::wait# or [code]#event::wait_and_throw# call is undefined. From ea55089574f5da273c3668ac6e8f6e5eb500e57d Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Fri, 6 Feb 2026 09:58:09 +0000 Subject: [PATCH 72/73] Address review comments. --- .../sycl_khr_free_function_commands.adoc | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index b330efa26..31e9e9334 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -1075,14 +1075,14 @@ this operation may be a no-op.{endnote} namespace sycl::khr { template -void enqueue_signal_event(const queue& q, event& e, const requirements& reqs = {}); +void enqueue_signal_event(const queue& q, const event& e, const requirements& reqs = {}); } ---- _Constraints_: -* [code]#Requirements# contains the [code]#wait_event# object; or -* [code]#Requirements# contains the [code]#wait_events# object; or +* [code]#Requirements# contains only the [code]#wait_event# object; or +* [code]#Requirements# contains only the [code]#wait_events# object; or * [code]#Requirements# is empty. _Effects_: Enqueues a command barrier. @@ -1115,7 +1115,7 @@ void enqueue_wait_event(const queue& q, const event& e, const requirements& evts, const req ---- _Constraints_ (1-2): -* [code]#Requirements# contains the [code]#signal_event# object; or +* [code]#Requirements# contains only the [code]#signal_event# object; or * [code]#Requirements# is empty. _Effects_ (1-2): Enqueues an event barrier. @@ -1174,26 +1174,25 @@ object), and as a dependency for other commands.{endnote} == Interactions between event APIs -An event [code]#e# created via [code]#make_event# can be used as a command +An event _E_ created via [code]#make_event# can be used as a command dependency using the APIs introduced by this extension (e.g. via the requirements object or [code]#enqueue_wait_event#) or using other APIs (e.g. -[code]#handler::depends_on#) for a command submitted to some queue [code]#q#. -It is not necessary for the context of [code]#e# to match the context of -[code]#q#. +[code]#handler::depends_on#) for a command submitted to some queue _Q_. +It is not necessary for the context of _E_ to match the context of _Q_. -An event [code]#e# created via [code]#make_event# can also be used for signaling +An event _E_ created via [code]#make_event# can also be used for signaling (using the [code]#enqueue_signal_event# function or included in the -[code]#requirements# object), for a command submitted to some queue [code]#q#. -In this case, the context of [code]#e# and [code]#q# must match. +[code]#requirements# object), for a command submitted to some queue _Q_. +In this case, the context of _E_ and _Q_ must match. -If an event [code]#e# is used as a command dependency for some command -[code]#c#, the dependency is captured at the point when [code]#c# is submitted. -It is legal to reassociate the event E to a new command via enqueue_signal_event -even before command [code]#c# completes. -Doing so does not change the dependency for command [code]#c#. +If an event _E_ is used as a command dependency for some command _C_, +the dependency is captured at the point when _C_ is submitted. +It is legal to reassociate the event _E_ to a new command via enqueue_signal_event +even before command _C_ completes. +Doing so does not change the dependency for command _C_. -If another host thread is blocked waiting for event [code]#e# to complete via -[code]#event::wait# or [code]#event::wait_and_throw# when event [code]#e# is +If another host thread is blocked waiting for event _E_ to complete via +[code]#event::wait# or [code]#event::wait_and_throw# when event _E_ is reassociated with a new command via [code]#enqueue_signal_event#, the behavior of the [code]#event::wait# or [code]#event::wait_and_throw# call is undefined. From 58fc68e5fe1a6a42f9b1312108a2766d69e95bfb Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Fri, 6 Feb 2026 10:06:41 +0000 Subject: [PATCH 73/73] Fix formatting. --- .../sycl_khr_free_function_commands.adoc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 31e9e9334..a0212d084 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -1174,9 +1174,9 @@ object), and as a dependency for other commands.{endnote} == Interactions between event APIs -An event _E_ created via [code]#make_event# can be used as a command -dependency using the APIs introduced by this extension (e.g. via the -requirements object or [code]#enqueue_wait_event#) or using other APIs (e.g. +An event _E_ created via [code]#make_event# can be used as a command dependency +using the APIs introduced by this extension (e.g. via the requirements object or +[code]#enqueue_wait_event#) or using other APIs (e.g. [code]#handler::depends_on#) for a command submitted to some queue _Q_. It is not necessary for the context of _E_ to match the context of _Q_. @@ -1186,10 +1186,10 @@ An event _E_ created via [code]#make_event# can also be used for signaling In this case, the context of _E_ and _Q_ must match. -If an event _E_ is used as a command dependency for some command _C_, -the dependency is captured at the point when _C_ is submitted. -It is legal to reassociate the event _E_ to a new command via enqueue_signal_event -even before command _C_ completes. +If an event _E_ is used as a command dependency for some command _C_, the +dependency is captured at the point when _C_ is submitted. +It is legal to reassociate the event _E_ to a new command via +enqueue_signal_event even before command _C_ completes. Doing so does not change the dependency for command _C_. If another host thread is blocked waiting for event _E_ to complete via