Skip to content

Commit b771efd

Browse files
committed
Simplify logic by adding an extra array slot.
Adds SubgroupSize*4 bytes overhead though
1 parent b55f238 commit b771efd

File tree

4 files changed

+11
-10
lines changed

4 files changed

+11
-10
lines changed

src/computesim.nim

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ proc workGroupProc[A, B, C](
160160
workgroupID: UVec3,
161161
wg: WorkGroupContext,
162162
compute: ThreadGenerator[A, B, C],
163-
ssbo: A, smem: ptr B, args: C) {.nimcall.} =
163+
ssbo: A, smem: ptr B, args: C) =
164164
# Auxiliary proc for work group management
165165
var wg = wg # Shadow for modification
166166
wg.gl_WorkGroupID = workgroupID

src/computesim/core.nim

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,15 @@ type
132132
thread: ThreadContext, threadId: uint32): SubgroupCommand
133133
SubgroupResults* = array[SubgroupSize, SubgroupResult]
134134
SubgroupCommands* = array[SubgroupSize, SubgroupCommand]
135-
SubgroupThreadIDs* = array[SubgroupSize, uint32]
135+
SubgroupThreadIDs* = array[SubgroupSize + 1, uint32]
136136
SubgroupThreads* = array[SubgroupSize, ThreadClosure]
137137
ThreadContexts* = array[SubgroupSize, ThreadContext]
138138
139139
const
140140
InvalidId* = high(uint32) # Sentinel value for empty/invalid
141141
142142
iterator threadsInGroup*(group: SubgroupThreadIDs): uint32 =
143-
for member in group.items:
144-
if member == InvalidId: break
145-
yield member
143+
var idx: uint32 = 0
144+
while group[idx] != InvalidId:
145+
yield group[idx]
146+
inc idx

src/computesim/lockstep.nim

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,14 @@ proc runThreads*(threads: SubgroupThreads; workGroup: WorkGroupContext,
3737
var
3838
anyThreadsActive = true
3939
allThreadsHalted = false
40+
threadGroups {.noinit.}: array[SubgroupSize, SubgroupThreadIDs]
4041
threadStates {.noinit.}: array[SubgroupSize, ThreadState]
4142
commands {.noinit.}: SubgroupCommands
4243
results {.noinit.}: SubgroupResults
4344
minReconvergeId: uint32 = 0
4445
barrierId = InvalidId
4546
barrierThreadCount: uint32 = 0
47+
numGroups: uint32 = 0
4648

4749
shouldShowDebugOutput(showDebugOutput)
4850
threadStates.fill(running)
@@ -105,9 +107,7 @@ proc runThreads*(threads: SubgroupThreads; workGroup: WorkGroupContext,
105107
barrierThreadCount, numActiveThreads)
106108
107109
# Group matching operations
108-
var
109-
threadGroups {.noinit.}: array[SubgroupSize, SubgroupThreadIDs]
110-
numGroups: uint32 = 0
110+
numGroups = 0
111111
112112
# Group by operation id
113113
for threadId in 0..<numActiveThreads:
@@ -122,8 +122,7 @@ proc runThreads*(threads: SubgroupThreads; workGroup: WorkGroupContext,
122122
for slot in 0..<SubgroupSize:
123123
if threadGroups[groupIdx][slot] == InvalidId:
124124
threadGroups[groupIdx][slot] = threadId
125-
if slot + 1 < SubgroupSize:
126-
threadGroups[groupIdx][slot + 1] = InvalidId
125+
threadGroups[groupIdx][slot + 1] = InvalidId
127126
break
128127
found = true
129128
break

tests/config.nims

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
--define:"release"
66
--define:"useMalloc"
77
--define:"ThreadPoolSize=10"
8+
# --define:"SubgroupSize=32"
89
--define:debugSubgroup
910
switch("define", "debugSubgroupID:1")
1011

0 commit comments

Comments
 (0)