@@ -116,27 +116,25 @@ proc runThreads*(threads: SubgroupThreads; workGroup: WorkGroupContext,
116
116
(threadStates[threadId] == atSubBarrier and canReconverge) or canPassBarrier):
117
117
var found = false
118
118
for groupIdx in 0..<numGroups:
119
- let firstThreadId = threadGroups[groupIdx][ 0]
119
+ let firstThreadId = threadGroups[groupIdx][ 1] # First thread is at index 1
120
120
if commands[firstThreadId].id == commands[threadId].id:
121
- # Find first empty slot in group
122
- for slot in 0 ..< SubgroupSize:
123
- if threadGroups[groupIdx][slot] == InvalidId:
124
- threadGroups[groupIdx][slot] = threadId
125
- threadGroups[groupIdx][slot + 1 ] = InvalidId
126
- break
121
+ let currentLen = threadGroups[groupIdx][0 ]
122
+ threadGroups[groupIdx][currentLen + 1 ] = threadId
123
+ threadGroups[groupIdx][0 ] = currentLen + 1
127
124
found = true
128
125
break
129
126
if not found:
130
- threadGroups[numGroups][0 ] = threadId
131
- threadGroups[numGroups][1 ] = InvalidId
127
+ # Create new group
128
+ threadGroups[numGroups][0 ] = 1 # Length is 1
129
+ threadGroups[numGroups][1 ] = threadId
132
130
inc numGroups
133
131
134
132
template execSubgroupOp(op: untyped ) =
135
133
op(results, commands, threadGroups[groupIdx], firstThreadId, opId, showDebugOutput)
136
134
137
135
# Process operation groups
138
136
for groupIdx in 0 ..< numGroups:
139
- let firstThreadId = threadGroups[groupIdx][0 ]
137
+ let firstThreadId = threadGroups[groupIdx][1 ]
140
138
let opKind = commands[firstThreadId].kind
141
139
let opId = commands[firstThreadId].id
142
140
case opKind:
0 commit comments