@@ -347,7 +347,9 @@ cl_int clEnqueueTranslateToTensor(
347347
348348* _mem_pitch_ defines the length of each dimension in elements to be
349349 used for the memory region of _buffer_ or _host_ptr_. The length of
350- the array must be at least the rank of _tensor_ minus one.
350+ the array must be at least the rank of _tensor_ minus one. if
351+ _mem_pitch_ is NULL or _mem_pitch_[i] is zero, _mem_pitch_[i] is
352+ computed as _region_[i + 1].
351353
352354* _buffer_ and _host_ptr_ refer to a valid buffer object / host
353355 allocation where data is to be read into or to be written from.
@@ -408,7 +410,8 @@ follows in pseudo C code:
408410size_t pitch(size_t dim) {
409411 size_t pitch = 1;
410412 for (size_t i = dim; i < tensor_rank - 1; i++)
411- pitch *= mem_pitch != NULL ? mem_pitch[i] : region[i + 1];
413+ pitch *=
414+ (mem_pitch != NULL || mem_pitch[i] == 0) ? mem_pitch[i] : region[i + 1];
412415 return pitch;
413416}
414417----
@@ -418,11 +421,131 @@ an abstract function that accesses a tensor element in its storage at
418421given coordinate. The method how the coordinates translate to tensor
419422storage addresses is unspecified.
420423
424+ *clEnqueueTranslateFsomTensor* and *clEnqueueTranslateToTensor*
425+ returns CL_SUCCESS if the function is executed
426+ successfully. Otherwise, it returns one of the following errors:
427+
428+ * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid host
429+ command-queue.
430+
431+ * CL_INVALID_CONTEXT if the context associated with _command_queue_
432+ and buffer are not the same or if the context associated with
433+ _command_queue_ and events in _event_wait_list_ are not the same.
434+
435+ * CL_INVALID_MEM_OBJECT if _buffer_ is not a valid buffer object.
436+
437+ * CL_INVALID_VALUE if _tensor_origin_ or _mem_origin_ is NULL.
438+
439+ * CL_INVALID_VALUE if the region being read or written specified by
440+ (_mem_origin_, _region_, _mem_pitch_) is out of bounds.
441+
442+ * CL_INVALID_VALUE if any _region_ array element is 0.
443+
444+ * CL_INVALID_VALUE if _mem_pitch_ is not NULL and _mem_pitch_[i] is
445+ not 0 and _mem_pitch_[i] is less than _region_[i].
446+
447+ * CL_INVALID_VALUE if _buffer_ and _host_ptr_ both are NULL or non-NULL.
448+
449+ * CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is NULL and
450+ _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and
451+ _num_events_in_wait_list_ is 0, or if event objects in
452+ _event_wait_list_ are not valid events.
453+
454+ * CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST if the read and write
455+ operations are blocking and the execution status of any of the
456+ events in _event_wait_list_ is a negative integer value.
457+
458+ * CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate
459+ memory for data store associated with memory object the _tensor_ is
460+ bound to.
461+
462+ * CL_OUT_OF_RESOURCES if there is a failure to allocate resources
463+ required by the OpenCL implementation on the device.
464+
465+ * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
466+ required by the OpenCL implementation on the host.
467+
421468// TODO: add clEnqueueCopyTensor
422469
423470// TODO: add clEnqueueFillTensor?
424471
425- TODO: add command buffer variants for clEnqueue*Tensor.
472+ If *cl_khr_command_buffer* is is supported, then the following command
473+ buffer counterparts of the *clEnqueueTranslateFromTensor* and
474+ *clEnqueueTranslateToTensor* commands are available.
475+
476+ [source,c]
477+ ----
478+ cl_int clCommandTranslateFromTensorKHR(
479+ cl_command_buffer_khr command_buffer,
480+ cl_command_queue command_queue,
481+ cl_tensor tensor,
482+ const size_t* tensor_origin,
483+ const size_t* mem_origin,
484+ const size_t* region,
485+ const size_t* mem_pitch,
486+ cl_mem buffer,
487+ void* host_ptr,
488+ cl_uint num_sync_points_in_wait_list,
489+ const cl_sync_point_khr* sync_point_wait_list,
490+ cl_sync_point_khr* sync_point,
491+ cl_mutable_command_khr* mutable_handle);
492+ ----
493+
494+ [source,c]
495+ ----
496+ cl_int clCommandTranslateToTensorKHR(
497+ cl_command_buffer_khr command_buffer,
498+ cl_command_queue command_queue,
499+ cl_tensor tensor,
500+ const size_t* tensor_origin,
501+ const size_t* mem_origin,
502+ const size_t* region,
503+ const size_t* mem_pitch,
504+ cl_mem buffer,
505+ const void* host_ptr,
506+ cl_uint num_sync_points_in_wait_list,
507+ const cl_sync_point_khr* sync_point_wait_list,
508+ cl_sync_point_khr* sync_point,
509+ cl_mutable_command_khr* mutable_handle);
510+ ----
511+
512+ * _command_buffer_ refers to valid command-buffer object.
513+
514+ * For _command_queue_, _tensor_, _tensor_origin_, _mem_origin_,
515+ _region_, _mem_pitch_, _buffer_ and _host_ptr_ parameters refer to
516+ *clEnqueueTranslateFromTensor*.
517+
518+ * For _num_sync_points_in_wait_list_, _sync_point_wait_list_,
519+ _sync_point_, _mutable_handle_ parameters refer to
520+ *clCommandCopyBufferKHR*.
521+
522+ *clCommandTranslateFromTensorKHR* and *clCommandTranslateFromTensorKHR*
523+ returns CL_SUCCESS if the function is executed
524+ successfully. Otherwise, it returns one of the following errors:
525+
526+ * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not NULL.
527+
528+ * CL_INVALID_COMMAND_BUFFER_KHR if _command_buffer_ is not a valid
529+ command-buffer.
530+
531+ * CL_INVALID_CONTEXT if the context associated with _command_queue_
532+ and _command_buffer_ is not the same.
533+
534+ * CL_INVALID_OPERATION if _command_buffer_ has been finalized.
535+
536+ * CL_INVALID_VALUE if _mutable_handle_ is not NULL.
537+
538+ * CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if _sync_point_wait_list_ is
539+ NULL and _num_sync_points_in_wait_list_ is > 0, or
540+ _sync_point_wait_list_ is not NULL and _num_sync_points_in_wait_list_ is
541+ 0, or if synchronization-point objects in _sync_point_wait_list_ are
542+ not valid synchronization-points.
543+
544+ * CL_OUT_OF_RESOURCES if there is a failure to allocate resources
545+ required by the OpenCL implementation on the device.
546+
547+ * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
548+ required by the OpenCL implementation on the host.
426549
427550==== Add New Buffer Property in Section 5.2.1
428551
@@ -580,17 +703,17 @@ std::vector<float> out_data(b * m * n);
580703// optimal data layout.
581704clEnqueueTranslateToTensor(
582705 cmd_q, in0, false, {0, 0, 0}, {0, 0, 0}, {b, m, k},
583- nullptr, nullptr, nullptr, in0_data.data(), 0, nullptr, nullptr);
706+ nullptr, nullptr, in0_data.data(), 0, nullptr, nullptr);
584707clEnqueueTranslateToTensor(
585708 cmd_q, in1, false, {0, 0, 0}, {0, 0, 0}, {b, k, n},
586- nullptr, nullptr, nullptr, in1_data.data(), 0, nullptr, nullptr);
709+ nullptr, nullptr, in1_data.data(), 0, nullptr, nullptr);
587710clEnqueueNDRangeKernel(
588711 cmd_q, matmul_kernel, 3, matmul_grid, nullptr, nullptr, 0, nullptr, nullptr);
589712clEnqueueNDRangeKernel(
590713 cmd_q, add_kernel, 3, add_grid, nullptr, nullptr, 0, nullptr, nullptr);
591714clEnqueueTranslateFromTensor(
592715 cmd_q, out, false, {0, 0, 0}, {0, 0, 0}, {b, m, n},
593- nullptr, nullptr, nullptr, out_data.data(), 0, nullptr, nullptr);
716+ nullptr, nullptr, out_data.data(), 0, nullptr, nullptr);
594717----
595718
596719An example use of tensors in a command buffer when cl_khr_command_buffer
@@ -642,10 +765,10 @@ cl_command_buffer_khr cb =
642765cl_sync_point_khr in0_syncp, in1_syncp, matmul_syncp, add_syncp;
643766clCommandTranslateToTensorKHR(
644767 cmd_b, cmd_q, in0, {0, 0, 0}, {0, 0, 0}, {b, m, k},
645- nullptr, nullptr, nullptr, in0_data.data(), 0, nullptr, &in0_syncp);
768+ nullptr, nullptr, in0_data.data(), 0, nullptr, &in0_syncp);
646769clCommandTranslateToTensorKHR(
647770 cmd_b, cmd_q, in1, {0, 0, 0}, {0, 0, 0}, {b, k, m},
648- nullptr, nullptr, nullptr, in1_data.data(), 0, nullptr, &in1_syncp);
771+ nullptr, nullptr, in1_data.data(), 0, nullptr, &in1_syncp);
649772clCommandNDRangeKernelKHR(
650773 cmd_b, cmd_q, nullptr, matmul_kernel, 3, matmul_grid, nullptr, nullptr,
651774 2, {in0_syncp, in2_syncp}, &matmul_syncp, nullptr);
@@ -654,7 +777,7 @@ clCommandNDRangeKernelKHR(
654777 1, {matmul_syncp}, &add_syncp, nullptr);
655778clCommandTranslateFromTensorKHR(
656779 cmd_b, cmd_q, out, {0, 0, 0}, {0, 0, 0}, {b, k, m},
657- nullptr, nullptr, nullptr, out_data.data(), 1, {add_syncp}, nullptr);
780+ nullptr, nullptr, out_data.data(), 1, {add_syncp}, nullptr);
658781
659782// Finalize the command buffer. At this point the OpenCL
660783// implementation may reserve enough storage for all the tensor
0 commit comments