Skip to content

Commit bc05593

Browse files
joevtdingusdev
authored andcommitted
bench1: Changes to reduce variability.
Chooses the best time out of 200 times for each of the 5 runs. The number of bytes is 32KB. Outputs a performance number MiB/s.
1 parent 4a71dbe commit bc05593

File tree

1 file changed

+43
-27
lines changed

1 file changed

+43
-27
lines changed

benchmark/bench1.cpp

+43-27
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,12 @@ uint32_t cs_code[] = {
4040
};
4141

4242
// 0x7FFFFFFC is the max
43-
constexpr uint32_t test_size = 0x800000;
43+
constexpr uint32_t test_size = 0x8000;
44+
constexpr uint32_t test_samples = 200;
45+
constexpr uint32_t test_iterations = 5;
4446

4547
int main(int argc, char** argv) {
46-
int i;
48+
int i, j;
4749

4850
/* initialize logging */
4951
loguru::g_preamble_date = false;
@@ -73,20 +75,23 @@ int main(int argc, char** argv) {
7375

7476
srand(0xCAFEBABE);
7577

76-
LOG_F(INFO, "Test size is 0x%X", test_size);
78+
LOG_F(INFO, "Test size: 0x%X", test_size);
7779
LOG_F(INFO, "First few bytes:");
80+
bool did_lf = false;
7881
for (i = 0; i < test_size; i++) {
7982
uint8_t val = rand() % 256;
8083
mmu_write_vmem<uint8_t>(0x1000+i, val);
8184
if (i < 64) {
8285
printf("%02x", val);
83-
if (i % 32 == 31)
86+
did_lf = false;
87+
if (i % 32 == 31) {
8488
printf("\n");
89+
did_lf = true;
90+
}
8591
}
8692
}
87-
printf("\n");
88-
89-
power_on = true;
93+
if (!did_lf)
94+
printf("\n");
9095

9196
#if 0
9297
/* prepare benchmark code execution */
@@ -101,32 +106,43 @@ int main(int argc, char** argv) {
101106
ppc_state.gpr[3] = 0x1000; // buf
102107
ppc_state.gpr[4] = test_size; // len
103108
ppc_state.gpr[5] = 0; // sum
109+
power_on = true;
104110
ppc_exec_until(0xC4);
105111

106112
LOG_F(INFO, "Checksum: 0x%08X", ppc_state.gpr[3]);
113+
uint32_t checksum = ppc_state.gpr[3];
107114

108115
// run the clock once for cache fill etc.
109-
auto start_time = std::chrono::steady_clock::now();
110-
auto end_time = std::chrono::steady_clock::now();
111-
auto time_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
112-
LOG_F(INFO, "Time elapsed (dry run): %lld ns", time_elapsed.count());
113-
114-
for (i = 0; i < 5; i++) {
115-
ppc_state.pc = 0;
116-
ppc_state.gpr[3] = 0x1000; // buf
117-
ppc_state.gpr[4] = test_size; // len
118-
ppc_state.gpr[5] = 0; // sum
119-
120-
auto start_time = std::chrono::steady_clock::now();
121-
122-
ppc_exec_until(0xC4);
123-
124-
auto end_time = std::chrono::steady_clock::now();
125-
126-
LOG_F(INFO, "Checksum: 0x%08X", ppc_state.gpr[3]);
127-
116+
uint64_t overhead = -1;
117+
for (j = 0; j < test_samples; j++) {
118+
auto start_time = std::chrono::steady_clock::now();
119+
auto end_time = std::chrono::steady_clock::now();
128120
auto time_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
129-
LOG_F(INFO, "Time elapsed (run #%d): %lld ns", i, time_elapsed.count());
121+
if (time_elapsed.count() < overhead)
122+
overhead = time_elapsed.count();
123+
}
124+
LOG_F(INFO, "Overhead Time: %lld ns", overhead);
125+
126+
for (i = 0; i < test_iterations; i++) {
127+
uint64_t best_sample = -1;
128+
for (j = 0; j < test_samples; j++) {
129+
ppc_state.pc = 0;
130+
ppc_state.gpr[3] = 0x1000; // buf
131+
ppc_state.gpr[4] = test_size; // len
132+
ppc_state.gpr[5] = 0; // sum
133+
power_on = true;
134+
135+
auto start_time = std::chrono::steady_clock::now();
136+
ppc_exec_until(0xC4);
137+
auto end_time = std::chrono::steady_clock::now();
138+
auto time_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
139+
if (time_elapsed.count() < best_sample)
140+
best_sample = time_elapsed.count();
141+
}
142+
if (ppc_state.gpr[3] != checksum)
143+
LOG_F(INFO, "Checksum: 0x%08X", ppc_state.gpr[3]);
144+
best_sample -= overhead;
145+
LOG_F(INFO, "(run #%d) Time: %lld ns Performance: %.4lf MiB/s", i, best_sample, 1E9 * test_size / (best_sample * 1024 * 1024));
130146
}
131147

132148
delete(grackle_obj);

0 commit comments

Comments
 (0)