This repository was archived by the owner on Sep 15, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 118
Expand file tree
/
Copy pathLoopUnrollDynamicIndexing.lgc
More file actions
129 lines (116 loc) · 7.76 KB
/
LoopUnrollDynamicIndexing.lgc
File metadata and controls
129 lines (116 loc) · 7.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
;
; Permission is hereby granted, free of charge, to any person obtaining a copy
; of this software and associated documentation files (the "Software"), to
; deal in the Software without restriction, including without limitation the
; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
; sell copies of the Software, and to permit persons to whom the Software is
; furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included in all
; copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
; IN THE SOFTWARE.
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; RUN: lgc -mcpu=gfx1030 -v -o /dev/null 2>&1 - <%s | FileCheck --check-prefixes=CHECK %s
; CHECK-LABEL: {{^// LLPC}} LGC before-lowering results
; CHECK: [[LOOP9:![0-9]+]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
; CHECK: [[META10]] = !{!"llvm.loop.unroll.count", i32 11}
; CHECK: [[META11]] = !{!"llvm.loop.disable_nonforced"}
; REQUIRES: do-not-run-me
; ModuleID = 'llpc_fragment_7'
source_filename = "llpc_fragment_7"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p32:32:32-p64:32:32-p65:32:32"
target triple = "amdgcn--amdpal"
; Function Attrs: alwaysinline nounwind
define dllexport spir_func void @main() local_unnamed_addr #0 !spirv.ExecutionModel !0 !lgc.shaderstage !1 {
.entry:
%0 = alloca [10 x <4 x float>], align 16, addrspace(5)
br label %1
1: ; preds = %3, %.entry
%s.0 = phi <4 x float> [ zeroinitializer, %.entry ], [ %s.0.vec.insert, %3 ]
%i.0 = phi i32 [ 0, %.entry ], [ %33, %3 ]
%2 = icmp slt i32 %i.0, 10
br i1 %2, label %3, label %34
3: ; preds = %1
%4 = and i32 %i.0, 15
%5 = add nsw i32 %4, -8
%6 = insertelement <2 x i32> poison, i32 %i.0, i64 0
%7 = insertelement <2 x i32> %6, i32 %5, i64 1
%8 = sitofp <2 x i32> %7 to <2 x float>
%9 = fmul reassoc nnan nsz arcp contract afn <2 x float> %8, splat (float 6.250000e-02)
%10 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 0, i32 0, i32 0, i32 144, <2 x float> %9)
%11 = insertelement <4 x float> poison, float %10, i64 0
%12 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 1, i32 0, i32 0, i32 144, <2 x float> %9)
%13 = insertelement <4 x float> poison, float %12, i64 0
%14 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 2, i32 0, i32 0, i32 144, <2 x float> %9)
%15 = insertelement <4 x float> poison, float %14, i64 0
%16 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 3, i32 0, i32 0, i32 144, <2 x float> %9)
%17 = insertelement <4 x float> poison, float %16, i64 0
%18 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 4, i32 0, i32 0, i32 144, <2 x float> %9)
%19 = insertelement <4 x float> poison, float %18, i64 0
%20 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 5, i32 0, i32 0, i32 144, <2 x float> %9)
%21 = insertelement <4 x float> poison, float %20, i64 0
%22 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 6, i32 0, i32 0, i32 144, <2 x float> %9)
%23 = insertelement <4 x float> poison, float %22, i64 0
%24 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 7, i32 0, i32 0, i32 144, <2 x float> %9)
%25 = insertelement <4 x float> poison, float %24, i64 0
%26 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 8, i32 0, i32 0, i32 144, <2 x float> %9)
%27 = insertelement <4 x float> poison, float %26, i64 0
%28 = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.read.generic.input.f32(i32 0, i32 9, i32 0, i32 0, i32 144, <2 x float> %9)
%29 = insertelement <4 x float> poison, float %28, i64 0
store <4 x float> %11, ptr addrspace(5) %0, align 16
%.fca.1.gep = getelementptr inbounds nuw i8, ptr addrspace(5) %0, i32 16
store <4 x float> %13, ptr addrspace(5) %.fca.1.gep, align 16
%.fca.2.gep = getelementptr inbounds nuw i8, ptr addrspace(5) %0, i32 32
store <4 x float> %15, ptr addrspace(5) %.fca.2.gep, align 16
%.fca.3.gep = getelementptr inbounds nuw i8, ptr addrspace(5) %0, i32 48
store <4 x float> %17, ptr addrspace(5) %.fca.3.gep, align 16
%.fca.4.gep = getelementptr inbounds nuw i8, ptr addrspace(5) %0, i32 64
store <4 x float> %19, ptr addrspace(5) %.fca.4.gep, align 16
%.fca.5.gep = getelementptr inbounds nuw i8, ptr addrspace(5) %0, i32 80
store <4 x float> %21, ptr addrspace(5) %.fca.5.gep, align 16
%.fca.6.gep = getelementptr inbounds nuw i8, ptr addrspace(5) %0, i32 96
store <4 x float> %23, ptr addrspace(5) %.fca.6.gep, align 16
%.fca.7.gep = getelementptr inbounds nuw i8, ptr addrspace(5) %0, i32 112
store <4 x float> %25, ptr addrspace(5) %.fca.7.gep, align 16
%.fca.8.gep = getelementptr inbounds nuw i8, ptr addrspace(5) %0, i32 128
store <4 x float> %27, ptr addrspace(5) %.fca.8.gep, align 16
%.fca.9.gep = getelementptr inbounds nuw i8, ptr addrspace(5) %0, i32 144
store <4 x float> %29, ptr addrspace(5) %.fca.9.gep, align 16
%s.0.vec.extract = extractelement <4 x float> %s.0, i64 0
%30 = getelementptr [10 x <4 x float>], ptr addrspace(5) %0, i32 0, i32 %i.0, i32 0
%31 = load float, ptr addrspace(5) %30, align 4
%32 = fadd reassoc nnan nsz arcp contract afn float %s.0.vec.extract, %31
%s.0.vec.insert = insertelement <4 x float> %s.0, float %32, i64 0
%33 = add i32 %i.0, 1
br label %1, !llvm.loop !2
34: ; preds = %1
%__llpc_input_proxy_frag_color.12.vec.insert = shufflevector <4 x float> %s.0, <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
call void (...) @lgc.create.write.generic.output(<4 x float> %__llpc_input_proxy_frag_color.12.vec.insert, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison)
ret void
}
; Function Attrs: nounwind willreturn memory(read)
declare !lgc.create.opcode !3 float @lgc.create.read.generic.input.f32(...) local_unnamed_addr #1
; Function Attrs: nounwind willreturn memory(read)
declare !lgc.create.opcode !3 <4 x float> @lgc.create.read.generic.input.v4f32(...) local_unnamed_addr #1
; Function Attrs: nodivergencesource nounwind
declare !lgc.create.opcode !4 void @lgc.create.write.generic.output(...) local_unnamed_addr #2
attributes #0 = { alwaysinline nounwind "denormal-fp-math-f32"="preserve-sign" }
attributes #1 = { nounwind willreturn memory(read) }
attributes #2 = { nodivergencesource nounwind }
!0 = !{i32 4}
!1 = !{i32 6}
!2 = distinct !{!2}
!3 = !{i32 74}
!4 = !{i32 77}