Skip to content

Commit 73ac3e1

Browse files
committed
cl_khr_unified_svm implementation of SYCL SVM
The OpenCL WG is developing the [cl_khr_unified_svm](KhronosGroup/OpenCL-Docs#1282) extension with an explicit goal of being sufficient to implement SYCL USM ontop of. A snapshot of the APIs are currently available in the upstream OpenCL-Headers repo, hence the commit bump to fetch these. This PR adds support of using cl_khr_unified_svm as a AdaptiveCpp OpenCL USM backend. It is tested by using [an emulation layer](https://github.com/bashbaug/SimpleOpenCLSamples/blob/cl_khr_unified_svm/layers/99_svmplusplus/emulate.cpp) ontop of a OpenCL CPU implementation with Intel USM extension support. Tested by running the AdaptiveCpp `usm_tests*` and `queue_tests*` unittests As the OpenCL extension APIs are subject to change, and there are no non-emulated implementations. This PR could stay as draft as a prototype until the OpenCL extension is finalized.
1 parent cfe723b commit 73ac3e1

5 files changed

Lines changed: 263 additions & 6 deletions

File tree

doc/install-ocl.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@ You will need an OpenCL implementation, and the OpenCL icd loader. The OpenCL li
55
In order to generate correct code, AdaptiveCpp needs to use its own fork of the Khronos LLVM-SPIRV translator hosted at https://github.com/AdaptiveCpp/SPIRV-LLVM-Translator. It will *not* work with the upstream translator. When building, AdaptiveCpp will automatically fetch and build the llvm-spirv translator for the right LLVM version.
66

77
The OpenCL backend can be enabled using `cmake -DWITH_OPENCL_BACKEND=ON` when building AdaptiveCpp.
8-
In order to run code successfully on an OpenCL device, it must support SPIR-V ingestion and the Intel USM (unified shared memory) extension. In a degraded mode, devices supporting OpenCL fine-grained system SVM (shared virtual memory) may work as well.
8+
In order to run code successfully on an OpenCL device, it must support SPIR-V ingestion and either the Intel USM (unified shared memory) or cl_khr_unified_svm extension.
9+
In a degraded mode, devices supporting OpenCL fine-grained system SVM (shared virtual memory) may work as well.
910

include/hipSYCL/runtime/ocl/ocl_usm.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ class ocl_usm {
5858

5959
static std::unique_ptr<ocl_usm> from_intel_extension(ocl_hardware_manager* hw_mgr, int device_index);
6060
static std::unique_ptr<ocl_usm> from_fine_grained_system_svm(ocl_hardware_manager* hw_mgr, int device_index);
61+
static std::unique_ptr<ocl_usm> from_usvm_khr(ocl_hardware_manager* hw_mgr, int device_index);
6162
};
6263

6364

6465
}
6566
}
6667

67-
#endif
68+
#endif

src/runtime/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ if(WITH_OPENCL_BACKEND)
241241
include(FetchContent)
242242
FetchContent_Declare(ocl-headers
243243
GIT_REPOSITORY https://github.com/KhronosGroup/OpenCL-Headers
244-
GIT_TAG 265df85aec478d14a5c5880d7bb92d7dd52714ef
244+
GIT_TAG 6137cfbbc7938cd43069d45c622022572fb87113
245245
)
246246

247247
FetchContent_MakeAvailable(ocl-headers)

src/runtime/ocl/ocl_hardware_manager.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -586,13 +586,20 @@ cl::Context ocl_hardware_context::get_cl_context() const {
586586
}
587587

588588
void ocl_hardware_context::init_allocator(ocl_hardware_manager *mgr) {
589-
_usm_provider = ocl_usm::from_intel_extension(mgr, _dev_id);
589+
// Priority of USM providers:
590+
// 1) cl_khr_unified_svm extension
591+
// 2) cl_intel_unified_shared_memory extension
592+
// 3) fine-grained system SVM
593+
_usm_provider = ocl_usm::from_usvm_khr(mgr, _dev_id);
594+
if(!_usm_provider->is_available()) {
595+
_usm_provider = ocl_usm::from_intel_extension(mgr, _dev_id);
596+
}
590597
if(!_usm_provider->is_available()) {
591598
// Try SVM fine-grained system as an alternative
592599
_usm_provider = ocl_usm::from_fine_grained_system_svm(mgr, _dev_id);
593600
if(_usm_provider->is_available()) {
594601
HIPSYCL_DEBUG_WARNING << "OpenCL device " << get_device_name()
595-
<< " does not support Intel USM extensions; "
602+
<< " does not support Intel USM or KHR USVM extensions; "
596603
"falling back to fine-grained system SVM. USM "
597604
"pointer info queries have limited support."
598605
<< std::endl;

src/runtime/ocl/ocl_usm.cpp

Lines changed: 249 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@
88
* See file LICENSE in the project root for full license details.
99
*/
1010
// SPDX-License-Identifier: BSD-2-Clause
11+
12+
#define CL_TARGET_OPENCL_VERSION 300
13+
#define CL_ENABLE_BETA_EXTENSIONS
14+
1115
#include "hipSYCL/runtime/error.hpp"
1216
#include "hipSYCL/runtime/ocl/ocl_hardware_manager.hpp"
1317
#include "hipSYCL/runtime/ocl/ocl_usm.hpp"
1418
#include "hipSYCL/runtime/operations.hpp"
1519

1620
#include <CL/opencl.hpp>
17-
#include <CL/cl.h>
1821
#include <CL/cl_ext.h>
1922
#include <memory>
2023

@@ -317,7 +320,242 @@ class ocl_usm_intel_extension : public ocl_usm {
317320
bool _is_cpu = false;
318321
};
319322

323+
class ocl_usvm_khr : public ocl_usm {
324+
public:
325+
ocl_usvm_khr(ocl_hardware_manager *hw_mgr, int device_index,
326+
const cl::Platform &platform, const cl::Device &dev,
327+
const cl::Context &ctx)
328+
: _ctx{ctx}, _dev{dev}, _hw_mgr{hw_mgr}, _device_index{device_index} {
329+
330+
std::string str;
331+
cl_int err = dev.getInfo(CL_DEVICE_EXTENSIONS, &str);
332+
333+
if (err != CL_SUCCESS ||
334+
(str.find("cl_khr_unified_svm") == std::string::npos)) {
335+
return;
336+
}
337+
338+
cl_platform_id id = platform.cl::detail::Wrapper<cl_platform_id>::get();
339+
340+
initialize_func(_alloc, "clSVMAllocWithPropertiesKHR", id);
341+
initialize_func(_free, "clSVMFreeWithPropertiesKHR", id);
342+
initialize_func(_pointer_info, "clGetSVMPointerInfoKHR", id);
343+
344+
// TODO - Update to OpenCL-C++ bindings when available
345+
size_t size;
346+
err = clGetDeviceInfo(_dev.get(), CL_DEVICE_SVM_TYPE_CAPABILITIES_KHR, 0, nullptr,
347+
&size);
348+
_svm_caps.resize(size / sizeof(cl_svm_capabilities_khr));
349+
err = clGetDeviceInfo(_dev.get(), CL_DEVICE_SVM_TYPE_CAPABILITIES_KHR, size,
350+
_svm_caps.data(), nullptr);
351+
for (size_t i = 0; i < _svm_caps.size(); i++) {
352+
if ((_svm_caps[i] & CL_SVM_TYPE_MACRO_SYSTEM_KHR) ==
353+
CL_SVM_TYPE_MACRO_SYSTEM_KHR) {
354+
_system_svm_type_index = static_cast<int32_t>(i);
355+
} else if ((_svm_caps[i] & CL_SVM_TYPE_MACRO_DEVICE_KHR) ==
356+
CL_SVM_TYPE_MACRO_DEVICE_KHR) {
357+
_device_svm_type_index = static_cast<int32_t>(i);
358+
} else if ((_svm_caps[i] & CL_SVM_TYPE_MACRO_HOST_KHR) ==
359+
CL_SVM_TYPE_MACRO_HOST_KHR) {
360+
_host_svm_type_index = static_cast<int32_t>(i);
361+
} else if ((_svm_caps[i] &
362+
CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR) ==
363+
CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR) {
364+
_single_device_shared_svm_type_index = static_cast<int32_t>(i);
365+
}
366+
}
367+
368+
// Device must support at least one of these capabilities to use this
369+
// ocl_usvm_khr implementation, otherwise can can fallback to ocl_usm_svm
370+
_is_available = (_system_svm_type_index != -1) ||
371+
(_device_svm_type_index != -1) ||
372+
(_host_svm_type_index != -1) ||
373+
(_single_device_shared_svm_type_index != -1);
374+
}
375+
376+
bool is_available() const override {
377+
return _is_available;
378+
}
379+
380+
bool has_usm_device_allocations() const override {
381+
if(_device_svm_type_index == -1)
382+
return false;
383+
const auto& caps = _svm_caps[_device_svm_type_index];
384+
return (caps & CL_SVM_TYPE_MACRO_DEVICE_KHR) == CL_SVM_TYPE_MACRO_DEVICE_KHR;
385+
}
386+
387+
bool has_usm_host_allocations() const override {
388+
if(_host_svm_type_index == -1)
389+
return false;
390+
391+
const auto& caps = _svm_caps[_host_svm_type_index];
392+
return (caps & CL_SVM_TYPE_MACRO_HOST_KHR) == CL_SVM_TYPE_MACRO_HOST_KHR;
393+
}
394+
395+
bool has_usm_atomic_host_allocations() const override {
396+
if(_host_svm_type_index == -1)
397+
return false;
398+
399+
const auto& caps = _svm_caps[_host_svm_type_index];
400+
return caps & CL_SVM_CAPABILITY_DEVICE_ATOMIC_ACCESS_KHR;
401+
}
402+
403+
bool has_usm_shared_allocations() const override {
404+
if(_single_device_shared_svm_type_index == -1)
405+
return false;
406+
407+
const auto& caps = _svm_caps[_single_device_shared_svm_type_index];
408+
return (caps & CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR) == CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR;
409+
}
410+
411+
bool has_usm_atomic_shared_allocations() const override {
412+
if(_single_device_shared_svm_type_index == -1)
413+
return false;
414+
415+
const auto& caps = _svm_caps[_single_device_shared_svm_type_index];
416+
return caps & CL_SVM_CAPABILITY_DEVICE_ATOMIC_ACCESS_KHR;
417+
}
418+
419+
bool has_usm_system_allocations() const override {
420+
if (_system_svm_type_index == -1)
421+
return false;
422+
423+
const auto& caps = _svm_caps[_system_svm_type_index];
424+
return (caps & CL_SVM_TYPE_MACRO_SYSTEM_KHR) == CL_SVM_TYPE_MACRO_SYSTEM_KHR;
425+
}
426+
427+
void* malloc_host(std::size_t size, std::size_t alignment, cl_int& err) override {
428+
if(!_alloc) {
429+
err = CL_INVALID_PLATFORM;
430+
return nullptr;
431+
}
432+
433+
cl_svm_alloc_properties_khr props[] = {CL_SVM_ALLOC_ALIGNMENT_KHR, alignment, 0};
434+
return _alloc(_ctx.get(), props, _host_svm_type_index, size, &err);
435+
}
436+
437+
void* malloc_device(std::size_t size, std::size_t alignment, cl_int& err) override {
438+
if(!_alloc) {
439+
err = CL_INVALID_PLATFORM;
440+
return nullptr;
441+
}
442+
443+
cl_svm_alloc_properties_khr props[] = {CL_SVM_ALLOC_ALIGNMENT_KHR, alignment,
444+
CL_SVM_ALLOC_ASSOCIATED_DEVICE_HANDLE_KHR,
445+
reinterpret_cast<cl_svm_alloc_properties_khr>(_dev.get()),
446+
0};
447+
return _alloc(_ctx.get(), props, _device_svm_type_index, size, &err);
448+
}
449+
450+
void* malloc_shared(std::size_t size, std::size_t alignment, cl_int& err) override {
451+
if(!_alloc) {
452+
err = CL_INVALID_PLATFORM;
453+
return nullptr;
454+
}
455+
456+
cl_svm_alloc_properties_khr props[] = {CL_SVM_ALLOC_ALIGNMENT_KHR, alignment,
457+
CL_SVM_ALLOC_ASSOCIATED_DEVICE_HANDLE_KHR,
458+
reinterpret_cast<cl_svm_alloc_properties_khr>(_dev.get()),
459+
0};
460+
return _alloc(_ctx.get(), props, _single_device_shared_svm_type_index, size, &err);
461+
}
462+
463+
cl_int free(void* ptr) override {
464+
if(!_free) {
465+
return CL_INVALID_PLATFORM;
466+
}
467+
return _free(_ctx.get(), nullptr, 0, ptr);
468+
}
469+
470+
cl_int get_alloc_info(const void* ptr, pointer_info& out) override {
471+
if(!_pointer_info) {
472+
return CL_INVALID_PLATFORM;
473+
}
474+
475+
out.is_from_host_backend = false;
476+
out.dev = _hw_mgr->get_device_id(_device_index);
477+
cl_uint type_index;
478+
cl_int err = _pointer_info(_ctx.get(), _dev.get(), ptr, CL_SVM_INFO_TYPE_INDEX_KHR,
479+
sizeof(type_index), &type_index, nullptr);
480+
if (err != CL_SUCCESS) {
481+
return err;
482+
} else if (CL_UINT_MAX == type_index) {
483+
return CL_INVALID_MEM_OBJECT;
484+
}
485+
486+
out.is_optimized_host = (type_index == _host_svm_type_index);
487+
out.is_usm = (type_index == _single_device_shared_svm_type_index);
488+
489+
return CL_SUCCESS;
490+
}
491+
492+
cl_int enqueue_memcpy(cl::CommandQueue &queue, void *dst,
493+
const void *src, std::size_t size,
494+
const std::vector<cl::Event> &wait_events,
495+
cl::Event *evt_out) override {
496+
return queue.enqueueMemcpySVM(dst, src, false, size, &wait_events, evt_out);
497+
}
498+
499+
cl_int enqueue_memset(cl::CommandQueue &queue, void *ptr,
500+
cl_int pattern, std::size_t bytes,
501+
const std::vector<cl::Event> &wait_events,
502+
cl::Event *out) override {
503+
unsigned char pattern_byte = static_cast<char>(pattern);
504+
return queue.enqueueMemFillSVM(ptr, pattern_byte, bytes, &wait_events, out);
505+
}
320506

507+
cl_int enqueue_prefetch(cl::CommandQueue &queue, const void *ptr,
508+
std::size_t bytes,
509+
cl_mem_migration_flags flags,
510+
const std::vector<cl::Event> &wait_events,
511+
cl::Event *event) override {
512+
// Seems there is a bug in CommandQueue::enqueueMigrateSVM, so we directly
513+
// call the OpenCL function
514+
cl_event tmp;
515+
cl_int err = ::clEnqueueSVMMigrateMem(
516+
queue.get(), 1, &ptr, &bytes, flags, wait_events.size(),
517+
(wait_events.size() > 0) ? (cl_event *)&wait_events.front() : nullptr,
518+
(event != nullptr) ? &tmp : nullptr);
519+
520+
if(event != nullptr && err == CL_SUCCESS) {
521+
*event = tmp;
522+
}
523+
return err;
524+
}
525+
526+
cl_int enable_indirect_usm_access(cl::Kernel& k) override {
527+
return k.setExecInfo(CL_KERNEL_EXEC_INFO_SVM_INDIRECT_ACCESS_KHR, cl_bool{true});
528+
}
529+
530+
private:
531+
template <class Func>
532+
void initialize_func(Func &out, const char *name, cl_platform_id id) {
533+
out = (Func)clGetExtensionFunctionAddressForPlatform(id, name);
534+
if (!out) {
535+
print_error(
536+
__acpp_here(),
537+
error_info{"ocl_usvm_khr: Platform advertises cl_khr_unified_svm support, but "
538+
"extracting function address for " +
539+
std::string{name} + " failed."});
540+
}
541+
}
542+
543+
bool _is_available = false;
544+
clSVMFreeWithPropertiesKHR_fn _free = nullptr;
545+
clSVMAllocWithPropertiesKHR_fn _alloc = nullptr;
546+
clGetSVMPointerInfoKHR_fn _pointer_info = nullptr;
547+
548+
int32_t _device_svm_type_index = -1;
549+
int32_t _host_svm_type_index = -1;
550+
int32_t _single_device_shared_svm_type_index = -1;
551+
int32_t _system_svm_type_index = -1;
552+
std::vector<cl_svm_capabilities_khr> _svm_caps;
553+
554+
cl::Context _ctx;
555+
cl::Device _dev;
556+
ocl_hardware_manager* _hw_mgr;
557+
int _device_index;
558+
};
321559

322560
class ocl_usm_svm : public ocl_usm {
323561
public:
@@ -496,5 +734,15 @@ ocl_usm::from_fine_grained_system_svm(ocl_hardware_manager* hw_mgr, int dev_id)
496734
ctx->get_cl_context());
497735
}
498736

737+
std::unique_ptr<ocl_usm>
738+
ocl_usm::from_usvm_khr(ocl_hardware_manager* hw_mgr, int dev_id) {
739+
ocl_hardware_context *ctx =
740+
static_cast<ocl_hardware_context *>(hw_mgr->get_device(dev_id));
741+
int platform_id = ctx->get_platform_id();
742+
return std::make_unique<ocl_usvm_khr>(
743+
hw_mgr, dev_id, hw_mgr->get_platform(platform_id), ctx->get_cl_device(),
744+
ctx->get_cl_context());
745+
}
746+
499747
}
500748
}

0 commit comments

Comments
 (0)