|
| 1 | +:data-uri: |
| 2 | +:icons: font |
| 3 | +include::../config/attribs.txt[] |
| 4 | +:source-highlighter: coderay |
| 5 | + |
| 6 | += cl_img_memory_management |
| 7 | + |
| 8 | +== Name Strings |
| 9 | + |
| 10 | +`cl_img_memory_management` |
| 11 | + |
| 12 | +== Contact |
| 13 | + |
| 14 | +Imagination Technologies Developer Forum: + |
| 15 | +https://forums.imgtec.com/ |
| 16 | + |
| 17 | +Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) |
| 18 | + |
| 19 | +== Contributors |
| 20 | + |
| 21 | +CY Cheng, Imagination Technologies. + |
| 22 | +Tomasz Platek, Imagination Technologies. |
| 23 | + |
| 24 | +== Notice |
| 25 | + |
| 26 | +Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. |
| 27 | + |
| 28 | +== Status |
| 29 | + |
| 30 | +Final Draft |
| 31 | + |
| 32 | +== Version |
| 33 | + |
| 34 | +Built On: {docdate} + |
| 35 | +Version: 1.0.0 |
| 36 | + |
| 37 | +== Dependencies |
| 38 | + |
| 39 | +This extension is written against the OpenCL C Specification Version V3.0.16. |
| 40 | + |
| 41 | +== Overview |
| 42 | + |
| 43 | +This extension adds built-in functions that expose the low-level memory and cache control instructions of Imagination GPU IP that are not accessible by standard OpenCL C functions. |
| 44 | + |
| 45 | +== New OpenCL C Feature Names |
| 46 | + |
| 47 | +[source,c] |
| 48 | +---- |
| 49 | +__opencl_img_fence |
| 50 | +__opencl_img_cache |
| 51 | +__opencl_img_load_store |
| 52 | +---- |
| 53 | + |
| 54 | +== New OpenCL C Functions |
| 55 | + |
| 56 | +Issues a data fence: |
| 57 | + |
| 58 | +[source,c] |
| 59 | +---- |
| 60 | +void img_fence(cache_target_img target); |
| 61 | +---- |
| 62 | + |
| 63 | +Perform the cache flush/invalidate operation: |
| 64 | + |
| 65 | +[source,c] |
| 66 | +---- |
| 67 | +void img_cache_flush(cache_target_img target); |
| 68 | +void img_cache_invalidate(cache_target_img target); |
| 69 | +void img_cache_flush_invalidate(cache_target_img target); |
| 70 | +---- |
| 71 | + |
| 72 | +Load to/store from memory: |
| 73 | + |
| 74 | +[source,c] |
| 75 | +---- |
| 76 | +gentype img_load(gentype *p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile); |
| 77 | +gentype img_load(const gentype *p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile); |
| 78 | +void img_store(gentype *p, gentype value, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile); |
| 79 | +---- |
| 80 | + |
| 81 | +== Modifications to the OpenCL C Specification |
| 82 | + |
| 83 | +(Add to Table 4 - Other Built-in Data Types in Section 6.3.3. Other Built-in Data Types) :: |
| 84 | ++ |
| 85 | +[cols=",",options="header",] |
| 86 | +|==== |
| 87 | +| Type | Description |
| 88 | +| `cache_target_img` |
| 89 | + | Target of the cache control functions. Refer to the Low-level Memory and Cache Control Functions section for a detailed description |
| 90 | + of the built-in functions that use this enum. |
| 91 | + |
| 92 | +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. |
| 93 | +| `cache_coherence_img` |
| 94 | + | Level of cache coherence. Refer to the Low-level Memory and Cache Control Functions section for a detailed description |
| 95 | + of the built-in functions that use this enum. |
| 96 | + |
| 97 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 98 | +| `L2_cache_policy_img` |
| 99 | + | Cache policy for the L2 cache. Refer to the Low-level Memory and Cache Control Functions section for a detailed description |
| 100 | + of the built-in functions that use this enum. |
| 101 | + |
| 102 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 103 | +| `cache_persistence_level_img` |
| 104 | + | Level of cache persistence. Refer to the Low-level Memory and Cache Control Functions section for a detailed description |
| 105 | + of the built-in functions that use this enum. |
| 106 | + |
| 107 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 108 | +|==== |
| 109 | + |
| 110 | +(Add a new Section 6.15.22, *Low-level Memory and Cache Control Functions*) :: |
| 111 | ++ |
| 112 | +-- |
| 113 | +The OpenCL C programming language implements the following built-in functions |
| 114 | +to perform low-level memory and cache control operations: |
| 115 | + |
| 116 | +[cols="1,2",options="header"] |
| 117 | +|==== |
| 118 | +| Function | Description |
| 119 | +| void *img_fence*(cache_target_img target) |
| 120 | + a| `img_fence` issues a data fence as far as the specified `target`. For example, using `cache_target_L2_img` issues a data fence for the L1 and L2 caches. |
| 121 | + |
| 122 | +Requires that the `__opencl_img_fence` feature macro is defined. |
| 123 | +| void *img_cache_flush*(cache_target_img target) |
| 124 | + a| `img_cache_flush` flushes cache, `target` determines how far through the memory hierarchy caches are flushed. For example, using `cache_target_L2_img` flushes the L1 and L2 caches. |
| 125 | + |
| 126 | +Requires that the `__opencl_img_cache` feature macro is defined. |
| 127 | +| void *img_cache_invalidate*(cache_target_img target) |
| 128 | + a| `img_cache_invalidate` invalidates cache, `target` determines how far through the memory hierarchy caches are invalidated. For example, using `cache_target_L2_img` invalidates the L1 and L2 caches. |
| 129 | + |
| 130 | +Requires that the `__opencl_img_cache` feature macro is defined. |
| 131 | +| void *img_cache_flush_invalidate*(cache_target_img target) |
| 132 | + a| `img_cache_flush_invalidate` flushes and invalidates cache, `target` determines how far through the memory hierarchy caches are flushed and invalidated. For example, using `cache_target_L2_img` flushes and invalidates the L1 and L2 caches. |
| 133 | + |
| 134 | +Requires that the `__opencl_img_cache` feature macro is defined. |
| 135 | +| gentype *img_load*(gentype pass:[*]p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile) + |
| 136 | + gentype *img_load*(const gentype pass:[*]p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile) |
| 137 | + a| `img_load` returns sizeof(gentype) bytes of data from `p`, where `coherence` specifies the level of cache coherence, `policy` specifies the cache policy for the L2 cache, `persistence` specifies the level of cache persistence, and `volatile` specifies volatility. |
| 138 | + |
| 139 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 140 | +| void *img_store*(gentype pass:[*]p, gentype value, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile) |
| 141 | + a| `img_store` writes 'value' to `p`, where `coherence` specifies the level of cache coherence, `policy` specifies the cache policy for the L2 cache, `persistence` specifies the level of cache persistence, and `volatile` specifies volatility. |
| 142 | + |
| 143 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 144 | +|==== |
| 145 | +-- |
| 146 | + |
| 147 | +=== Cache Target |
| 148 | + |
| 149 | +The enumerated type `cache_target_img` specifies the target of the cache control functions. |
| 150 | +The following table lists the enumeration constants: |
| 151 | + |
| 152 | +[cols=",",options="header",] |
| 153 | +|==== |
| 154 | +| Cache Target | Additional Notes |
| 155 | +| `cache_target_L1_img` |
| 156 | + | Performs the operation on the L1 cache. |
| 157 | + |
| 158 | +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. |
| 159 | +| `cache_target_L2_img` |
| 160 | + | Performs the operation on the L1 and L2 caches. |
| 161 | + |
| 162 | +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. |
| 163 | +| `cache_target_external_img` |
| 164 | + | Performs the operation on the L1, L2, and external caches. |
| 165 | + |
| 166 | +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. |
| 167 | +|==== |
| 168 | + |
| 169 | +=== Cache Persistence |
| 170 | +Cache persistence modifies the priority of the request in the cache where low level means that requests are evicted quickly and high level means that requests remain in cache for a long time. |
| 171 | +The enumerated type `cache_persistence_level_img` specifies the level of cache persistence. |
| 172 | +The following table lists the enumeration constants: |
| 173 | + |
| 174 | +[cols=",",options="header",] |
| 175 | +|==== |
| 176 | +| Cache Persistence | Additional Notes |
| 177 | +| `cache_persistence_level_default_img` |
| 178 | + | `cache_persistence_level_min_img` is the default persistence level. |
| 179 | + |
| 180 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 181 | +| `cache_persistence_level_min_img` |
| 182 | + | Requires that the `__opencl_img_load_store` feature macro is defined. |
| 183 | +| `cache_persistence_level_low_img` |
| 184 | + | Requires that the `__opencl_img_load_store` feature macro is defined. |
| 185 | +| `cache_persistence_level_high_img` |
| 186 | + | Requires that the `__opencl_img_load_store` feature macro is defined. |
| 187 | +| `cache_persistence_level_max_img` |
| 188 | + | Requires that the `__opencl_img_load_store` feature macro is defined. |
| 189 | +|==== |
| 190 | + |
| 191 | +[[cache-coherence]] |
| 192 | +==== Cache Coherence |
| 193 | +The enumerated type `cache_coherence_img` specifies the level of cache coherence. |
| 194 | +The following table lists the enumeration constants: |
| 195 | + |
| 196 | +[cols=",",options="header",] |
| 197 | +|==== |
| 198 | +| Cache Coherence | Additional Notes |
| 199 | +| `cache_coherence_L1_img` |
| 200 | + | Cache coherence is guaranteed at the L1 level. Explicit flush or invalidate may be needed to ensure data coherency at higher levels. |
| 201 | + |
| 202 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 203 | +| `cache_coherence_L2_img` |
| 204 | + | Cache coherence is guaranteed at the L2 level. Explicit flush or invalidate may be needed to ensure data coherency at higher levels. |
| 205 | + |
| 206 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 207 | +|==== |
| 208 | + |
| 209 | +=== L2 Cache Policy |
| 210 | +The enumerated type `L2_cache_policy_img` specifies the cache policy for the L2 cache. |
| 211 | +The following table lists the enumeration constants: |
| 212 | + |
| 213 | +[cols=",",options="header",] |
| 214 | +|==== |
| 215 | +| L2 Cache Policy | Additional Notes |
| 216 | +| `L2_cache_policy_new_alloc_img` |
| 217 | + | Allocates a new cache line on a cache miss. |
| 218 | + |
| 219 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 220 | +| `L2_cache_policy_bypass_img` |
| 221 | + | Permits to bypass the cache and access memory directly. |
| 222 | + |
| 223 | +Requires that the `__opencl_img_load_store` feature macro is defined. |
| 224 | +|==== |
| 225 | + |
| 226 | +== Coding Sample |
| 227 | + |
| 228 | +This coding sample shows how to use the *img_load* and *img_store* functions: |
| 229 | +[source] |
| 230 | +---- |
| 231 | +__kernel void test(__global int *in, __global int *out) { |
| 232 | + int a = img_load(in, cache_coherence_L1_img, L2_cache_policy_new_alloc_img, cache_persistence_level_min_img, true); |
| 233 | + a += 1; |
| 234 | + img_store(out, a, cache_coherence_L1_img, L2_cache_policy_new_alloc_img, cache_persistence_level_min_img, true); |
| 235 | +} |
| 236 | +---- |
| 237 | + |
| 238 | +== Version History |
| 239 | + |
| 240 | +[cols="5,15,15,70"] |
| 241 | +[grid="rows"] |
| 242 | +[options="header"] |
| 243 | +|==== |
| 244 | +| Version | Date | Author | Changes |
| 245 | +| 1.0.0 | 2024-06-19 | Tomasz Platek | *Initial revision* |
| 246 | +|==== |
| 247 | + |
0 commit comments