@@ -40,10 +40,12 @@ uint32_t cs_code[] = {
40
40
};
41
41
42
42
// 0x7FFFFFFC is the max
43
- constexpr uint32_t test_size = 0x800000 ;
43
+ constexpr uint32_t test_size = 0x8000 ;
44
+ constexpr uint32_t test_samples = 200 ;
45
+ constexpr uint32_t test_iterations = 5 ;
44
46
45
47
int main (int argc, char ** argv) {
46
- int i;
48
+ int i, j ;
47
49
48
50
/* initialize logging */
49
51
loguru::g_preamble_date = false ;
@@ -73,20 +75,23 @@ int main(int argc, char** argv) {
73
75
74
76
srand (0xCAFEBABE );
75
77
76
- LOG_F (INFO, " Test size is 0x%X" , test_size);
78
+ LOG_F (INFO, " Test size: 0x%X" , test_size);
77
79
LOG_F (INFO, " First few bytes:" );
80
+ bool did_lf = false ;
78
81
for (i = 0 ; i < test_size; i++) {
79
82
uint8_t val = rand () % 256 ;
80
83
mmu_write_vmem<uint8_t >(0x1000 +i, val);
81
84
if (i < 64 ) {
82
85
printf (" %02x" , val);
83
- if (i % 32 == 31 )
86
+ did_lf = false ;
87
+ if (i % 32 == 31 ) {
84
88
printf (" \n " );
89
+ did_lf = true ;
90
+ }
85
91
}
86
92
}
87
- printf (" \n " );
88
-
89
- power_on = true ;
93
+ if (!did_lf)
94
+ printf (" \n " );
90
95
91
96
#if 0
92
97
/* prepare benchmark code execution */
@@ -101,32 +106,43 @@ int main(int argc, char** argv) {
101
106
ppc_state.gpr [3 ] = 0x1000 ; // buf
102
107
ppc_state.gpr [4 ] = test_size; // len
103
108
ppc_state.gpr [5 ] = 0 ; // sum
109
+ power_on = true ;
104
110
ppc_exec_until (0xC4 );
105
111
106
112
LOG_F (INFO, " Checksum: 0x%08X" , ppc_state.gpr [3 ]);
113
+ uint32_t checksum = ppc_state.gpr [3 ];
107
114
108
115
// run the clock once for cache fill etc.
109
- auto start_time = std::chrono::steady_clock::now ();
110
- auto end_time = std::chrono::steady_clock::now ();
111
- auto time_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
112
- LOG_F (INFO, " Time elapsed (dry run): %lld ns" , time_elapsed.count ());
113
-
114
- for (i = 0 ; i < 5 ; i++) {
115
- ppc_state.pc = 0 ;
116
- ppc_state.gpr [3 ] = 0x1000 ; // buf
117
- ppc_state.gpr [4 ] = test_size; // len
118
- ppc_state.gpr [5 ] = 0 ; // sum
119
-
120
- auto start_time = std::chrono::steady_clock::now ();
121
-
122
- ppc_exec_until (0xC4 );
123
-
124
- auto end_time = std::chrono::steady_clock::now ();
125
-
126
- LOG_F (INFO, " Checksum: 0x%08X" , ppc_state.gpr [3 ]);
127
-
116
+ uint64_t overhead = -1 ;
117
+ for (j = 0 ; j < test_samples; j++) {
118
+ auto start_time = std::chrono::steady_clock::now ();
119
+ auto end_time = std::chrono::steady_clock::now ();
128
120
auto time_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
129
- LOG_F (INFO, " Time elapsed (run #%d): %lld ns" , i, time_elapsed.count ());
121
+ if (time_elapsed.count () < overhead)
122
+ overhead = time_elapsed.count ();
123
+ }
124
+ LOG_F (INFO, " Overhead Time: %lld ns" , overhead);
125
+
126
+ for (i = 0 ; i < test_iterations; i++) {
127
+ uint64_t best_sample = -1 ;
128
+ for (j = 0 ; j < test_samples; j++) {
129
+ ppc_state.pc = 0 ;
130
+ ppc_state.gpr [3 ] = 0x1000 ; // buf
131
+ ppc_state.gpr [4 ] = test_size; // len
132
+ ppc_state.gpr [5 ] = 0 ; // sum
133
+ power_on = true ;
134
+
135
+ auto start_time = std::chrono::steady_clock::now ();
136
+ ppc_exec_until (0xC4 );
137
+ auto end_time = std::chrono::steady_clock::now ();
138
+ auto time_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
139
+ if (time_elapsed.count () < best_sample)
140
+ best_sample = time_elapsed.count ();
141
+ }
142
+ if (ppc_state.gpr [3 ] != checksum)
143
+ LOG_F (INFO, " Checksum: 0x%08X" , ppc_state.gpr [3 ]);
144
+ best_sample -= overhead;
145
+ LOG_F (INFO, " (run #%d) Time: %lld ns Performance: %.4lf MiB/s" , i, best_sample, 1E9 * test_size / (best_sample * 1024 * 1024 ));
130
146
}
131
147
132
148
delete (grackle_obj);
0 commit comments