|
8 | 8 | * See file LICENSE in the project root for full license details. |
9 | 9 | */ |
10 | 10 | // SPDX-License-Identifier: BSD-2-Clause |
| 11 | + |
| 12 | +#define CL_TARGET_OPENCL_VERSION 300 |
| 13 | +#define CL_ENABLE_BETA_EXTENSIONS |
| 14 | + |
11 | 15 | #include "hipSYCL/runtime/error.hpp" |
12 | 16 | #include "hipSYCL/runtime/ocl/ocl_hardware_manager.hpp" |
13 | 17 | #include "hipSYCL/runtime/ocl/ocl_usm.hpp" |
14 | 18 | #include "hipSYCL/runtime/operations.hpp" |
15 | 19 |
|
16 | 20 | #include <CL/opencl.hpp> |
17 | | -#include <CL/cl.h> |
18 | 21 | #include <CL/cl_ext.h> |
19 | 22 | #include <memory> |
20 | 23 |
|
@@ -317,7 +320,242 @@ class ocl_usm_intel_extension : public ocl_usm { |
317 | 320 | bool _is_cpu = false; |
318 | 321 | }; |
319 | 322 |
|
| 323 | +class ocl_usvm_khr : public ocl_usm { |
| 324 | +public: |
| 325 | + ocl_usvm_khr(ocl_hardware_manager *hw_mgr, int device_index, |
| 326 | + const cl::Platform &platform, const cl::Device &dev, |
| 327 | + const cl::Context &ctx) |
| 328 | + : _ctx{ctx}, _dev{dev}, _hw_mgr{hw_mgr}, _device_index{device_index} { |
| 329 | + |
| 330 | + std::string str; |
| 331 | + cl_int err = dev.getInfo(CL_DEVICE_EXTENSIONS, &str); |
| 332 | + |
| 333 | + if (err != CL_SUCCESS || |
| 334 | + (str.find("cl_khr_unified_svm") == std::string::npos)) { |
| 335 | + return; |
| 336 | + } |
| 337 | + |
| 338 | + cl_platform_id id = platform.cl::detail::Wrapper<cl_platform_id>::get(); |
| 339 | + |
| 340 | + initialize_func(_alloc, "clSVMAllocWithPropertiesKHR", id); |
| 341 | + initialize_func(_free, "clSVMFreeWithPropertiesKHR", id); |
| 342 | + initialize_func(_pointer_info, "clGetSVMPointerInfoKHR", id); |
| 343 | + |
| 344 | + // TODO - Update to OpenCL-C++ bindings when available |
| 345 | + size_t size; |
| 346 | + err = clGetDeviceInfo(_dev.get(), CL_DEVICE_SVM_TYPE_CAPABILITIES_KHR, 0, nullptr, |
| 347 | + &size); |
| 348 | + _svm_caps.resize(size / sizeof(cl_svm_capabilities_khr)); |
| 349 | + err = clGetDeviceInfo(_dev.get(), CL_DEVICE_SVM_TYPE_CAPABILITIES_KHR, size, |
| 350 | + _svm_caps.data(), nullptr); |
| 351 | + for (size_t i = 0; i < _svm_caps.size(); i++) { |
| 352 | + if ((_svm_caps[i] & CL_SVM_TYPE_MACRO_SYSTEM_KHR) == |
| 353 | + CL_SVM_TYPE_MACRO_SYSTEM_KHR) { |
| 354 | + _system_svm_type_index = static_cast<int32_t>(i); |
| 355 | + } else if ((_svm_caps[i] & CL_SVM_TYPE_MACRO_DEVICE_KHR) == |
| 356 | + CL_SVM_TYPE_MACRO_DEVICE_KHR) { |
| 357 | + _device_svm_type_index = static_cast<int32_t>(i); |
| 358 | + } else if ((_svm_caps[i] & CL_SVM_TYPE_MACRO_HOST_KHR) == |
| 359 | + CL_SVM_TYPE_MACRO_HOST_KHR) { |
| 360 | + _host_svm_type_index = static_cast<int32_t>(i); |
| 361 | + } else if ((_svm_caps[i] & |
| 362 | + CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR) == |
| 363 | + CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR) { |
| 364 | + _single_device_shared_svm_type_index = static_cast<int32_t>(i); |
| 365 | + } |
| 366 | + } |
| 367 | + |
| 368 | + // Device must support at least one of these capabilities to use this |
| 369 | + // ocl_usvm_khr implementation, otherwise can can fallback to ocl_usm_svm |
| 370 | + _is_available = (_system_svm_type_index != -1) || |
| 371 | + (_device_svm_type_index != -1) || |
| 372 | + (_host_svm_type_index != -1) || |
| 373 | + (_single_device_shared_svm_type_index != -1); |
| 374 | + } |
| 375 | + |
| 376 | + bool is_available() const override { |
| 377 | + return _is_available; |
| 378 | + } |
| 379 | + |
| 380 | + bool has_usm_device_allocations() const override { |
| 381 | + if(_device_svm_type_index == -1) |
| 382 | + return false; |
| 383 | + const auto& caps = _svm_caps[_device_svm_type_index]; |
| 384 | + return (caps & CL_SVM_TYPE_MACRO_DEVICE_KHR) == CL_SVM_TYPE_MACRO_DEVICE_KHR; |
| 385 | + } |
| 386 | + |
| 387 | + bool has_usm_host_allocations() const override { |
| 388 | + if(_host_svm_type_index == -1) |
| 389 | + return false; |
| 390 | + |
| 391 | + const auto& caps = _svm_caps[_host_svm_type_index]; |
| 392 | + return (caps & CL_SVM_TYPE_MACRO_HOST_KHR) == CL_SVM_TYPE_MACRO_HOST_KHR; |
| 393 | + } |
| 394 | + |
| 395 | + bool has_usm_atomic_host_allocations() const override { |
| 396 | + if(_host_svm_type_index == -1) |
| 397 | + return false; |
| 398 | + |
| 399 | + const auto& caps = _svm_caps[_host_svm_type_index]; |
| 400 | + return caps & CL_SVM_CAPABILITY_DEVICE_ATOMIC_ACCESS_KHR; |
| 401 | + } |
| 402 | + |
| 403 | + bool has_usm_shared_allocations() const override { |
| 404 | + if(_single_device_shared_svm_type_index == -1) |
| 405 | + return false; |
| 406 | + |
| 407 | + const auto& caps = _svm_caps[_single_device_shared_svm_type_index]; |
| 408 | + return (caps & CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR) == CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR; |
| 409 | + } |
| 410 | + |
| 411 | + bool has_usm_atomic_shared_allocations() const override { |
| 412 | + if(_single_device_shared_svm_type_index == -1) |
| 413 | + return false; |
| 414 | + |
| 415 | + const auto& caps = _svm_caps[_single_device_shared_svm_type_index]; |
| 416 | + return caps & CL_SVM_CAPABILITY_DEVICE_ATOMIC_ACCESS_KHR; |
| 417 | + } |
| 418 | + |
| 419 | + bool has_usm_system_allocations() const override { |
| 420 | + if (_system_svm_type_index == -1) |
| 421 | + return false; |
| 422 | + |
| 423 | + const auto& caps = _svm_caps[_system_svm_type_index]; |
| 424 | + return (caps & CL_SVM_TYPE_MACRO_SYSTEM_KHR) == CL_SVM_TYPE_MACRO_SYSTEM_KHR; |
| 425 | + } |
| 426 | + |
| 427 | + void* malloc_host(std::size_t size, std::size_t alignment, cl_int& err) override { |
| 428 | + if(!_alloc) { |
| 429 | + err = CL_INVALID_PLATFORM; |
| 430 | + return nullptr; |
| 431 | + } |
| 432 | + |
| 433 | + cl_svm_alloc_properties_khr props[] = {CL_SVM_ALLOC_ALIGNMENT_KHR, alignment, 0}; |
| 434 | + return _alloc(_ctx.get(), props, _host_svm_type_index, size, &err); |
| 435 | + } |
| 436 | + |
| 437 | + void* malloc_device(std::size_t size, std::size_t alignment, cl_int& err) override { |
| 438 | + if(!_alloc) { |
| 439 | + err = CL_INVALID_PLATFORM; |
| 440 | + return nullptr; |
| 441 | + } |
| 442 | + |
| 443 | + cl_svm_alloc_properties_khr props[] = {CL_SVM_ALLOC_ALIGNMENT_KHR, alignment, |
| 444 | + CL_SVM_ALLOC_ASSOCIATED_DEVICE_HANDLE_KHR, |
| 445 | + reinterpret_cast<cl_svm_alloc_properties_khr>(_dev.get()), |
| 446 | + 0}; |
| 447 | + return _alloc(_ctx.get(), props, _device_svm_type_index, size, &err); |
| 448 | + } |
| 449 | + |
| 450 | + void* malloc_shared(std::size_t size, std::size_t alignment, cl_int& err) override { |
| 451 | + if(!_alloc) { |
| 452 | + err = CL_INVALID_PLATFORM; |
| 453 | + return nullptr; |
| 454 | + } |
| 455 | + |
| 456 | + cl_svm_alloc_properties_khr props[] = {CL_SVM_ALLOC_ALIGNMENT_KHR, alignment, |
| 457 | + CL_SVM_ALLOC_ASSOCIATED_DEVICE_HANDLE_KHR, |
| 458 | + reinterpret_cast<cl_svm_alloc_properties_khr>(_dev.get()), |
| 459 | + 0}; |
| 460 | + return _alloc(_ctx.get(), props, _single_device_shared_svm_type_index, size, &err); |
| 461 | + } |
| 462 | + |
| 463 | + cl_int free(void* ptr) override { |
| 464 | + if(!_free) { |
| 465 | + return CL_INVALID_PLATFORM; |
| 466 | + } |
| 467 | + return _free(_ctx.get(), nullptr, 0, ptr); |
| 468 | + } |
| 469 | + |
| 470 | + cl_int get_alloc_info(const void* ptr, pointer_info& out) override { |
| 471 | + if(!_pointer_info) { |
| 472 | + return CL_INVALID_PLATFORM; |
| 473 | + } |
| 474 | + |
| 475 | + out.is_from_host_backend = false; |
| 476 | + out.dev = _hw_mgr->get_device_id(_device_index); |
| 477 | + cl_uint type_index; |
| 478 | + cl_int err = _pointer_info(_ctx.get(), _dev.get(), ptr, CL_SVM_INFO_TYPE_INDEX_KHR, |
| 479 | + sizeof(type_index), &type_index, nullptr); |
| 480 | + if (err != CL_SUCCESS) { |
| 481 | + return err; |
| 482 | + } else if (CL_UINT_MAX == type_index) { |
| 483 | + return CL_INVALID_MEM_OBJECT; |
| 484 | + } |
| 485 | + |
| 486 | + out.is_optimized_host = (type_index == _host_svm_type_index); |
| 487 | + out.is_usm = (type_index == _single_device_shared_svm_type_index); |
| 488 | + |
| 489 | + return CL_SUCCESS; |
| 490 | + } |
| 491 | + |
| 492 | + cl_int enqueue_memcpy(cl::CommandQueue &queue, void *dst, |
| 493 | + const void *src, std::size_t size, |
| 494 | + const std::vector<cl::Event> &wait_events, |
| 495 | + cl::Event *evt_out) override { |
| 496 | + return queue.enqueueMemcpySVM(dst, src, false, size, &wait_events, evt_out); |
| 497 | + } |
| 498 | + |
| 499 | + cl_int enqueue_memset(cl::CommandQueue &queue, void *ptr, |
| 500 | + cl_int pattern, std::size_t bytes, |
| 501 | + const std::vector<cl::Event> &wait_events, |
| 502 | + cl::Event *out) override { |
| 503 | + unsigned char pattern_byte = static_cast<char>(pattern); |
| 504 | + return queue.enqueueMemFillSVM(ptr, pattern_byte, bytes, &wait_events, out); |
| 505 | + } |
320 | 506 |
|
| 507 | + cl_int enqueue_prefetch(cl::CommandQueue &queue, const void *ptr, |
| 508 | + std::size_t bytes, |
| 509 | + cl_mem_migration_flags flags, |
| 510 | + const std::vector<cl::Event> &wait_events, |
| 511 | + cl::Event *event) override { |
| 512 | + // Seems there is a bug in CommandQueue::enqueueMigrateSVM, so we directly |
| 513 | + // call the OpenCL function |
| 514 | + cl_event tmp; |
| 515 | + cl_int err = ::clEnqueueSVMMigrateMem( |
| 516 | + queue.get(), 1, &ptr, &bytes, flags, wait_events.size(), |
| 517 | + (wait_events.size() > 0) ? (cl_event *)&wait_events.front() : nullptr, |
| 518 | + (event != nullptr) ? &tmp : nullptr); |
| 519 | + |
| 520 | + if(event != nullptr && err == CL_SUCCESS) { |
| 521 | + *event = tmp; |
| 522 | + } |
| 523 | + return err; |
| 524 | + } |
| 525 | + |
| 526 | + cl_int enable_indirect_usm_access(cl::Kernel& k) override { |
| 527 | + return k.setExecInfo(CL_KERNEL_EXEC_INFO_SVM_INDIRECT_ACCESS_KHR, cl_bool{true}); |
| 528 | + } |
| 529 | + |
| 530 | +private: |
| 531 | + template <class Func> |
| 532 | + void initialize_func(Func &out, const char *name, cl_platform_id id) { |
| 533 | + out = (Func)clGetExtensionFunctionAddressForPlatform(id, name); |
| 534 | + if (!out) { |
| 535 | + print_error( |
| 536 | + __acpp_here(), |
| 537 | + error_info{"ocl_usvm_khr: Platform advertises cl_khr_unified_svm support, but " |
| 538 | + "extracting function address for " + |
| 539 | + std::string{name} + " failed."}); |
| 540 | + } |
| 541 | + } |
| 542 | + |
| 543 | + bool _is_available = false; |
| 544 | + clSVMFreeWithPropertiesKHR_fn _free = nullptr; |
| 545 | + clSVMAllocWithPropertiesKHR_fn _alloc = nullptr; |
| 546 | + clGetSVMPointerInfoKHR_fn _pointer_info = nullptr; |
| 547 | + |
| 548 | + int32_t _device_svm_type_index = -1; |
| 549 | + int32_t _host_svm_type_index = -1; |
| 550 | + int32_t _single_device_shared_svm_type_index = -1; |
| 551 | + int32_t _system_svm_type_index = -1; |
| 552 | + std::vector<cl_svm_capabilities_khr> _svm_caps; |
| 553 | + |
| 554 | + cl::Context _ctx; |
| 555 | + cl::Device _dev; |
| 556 | + ocl_hardware_manager* _hw_mgr; |
| 557 | + int _device_index; |
| 558 | +}; |
321 | 559 |
|
322 | 560 | class ocl_usm_svm : public ocl_usm { |
323 | 561 | public: |
@@ -496,5 +734,15 @@ ocl_usm::from_fine_grained_system_svm(ocl_hardware_manager* hw_mgr, int dev_id) |
496 | 734 | ctx->get_cl_context()); |
497 | 735 | } |
498 | 736 |
|
| 737 | +std::unique_ptr<ocl_usm> |
| 738 | +ocl_usm::from_usvm_khr(ocl_hardware_manager* hw_mgr, int dev_id) { |
| 739 | + ocl_hardware_context *ctx = |
| 740 | + static_cast<ocl_hardware_context *>(hw_mgr->get_device(dev_id)); |
| 741 | + int platform_id = ctx->get_platform_id(); |
| 742 | + return std::make_unique<ocl_usvm_khr>( |
| 743 | + hw_mgr, dev_id, hw_mgr->get_platform(platform_id), ctx->get_cl_device(), |
| 744 | + ctx->get_cl_context()); |
| 745 | +} |
| 746 | + |
499 | 747 | } |
500 | 748 | } |
0 commit comments