Skip to content

Commit 428469f

Browse files
committed
Squashed commit of the following:
commit a7d8bd5 Author: Ammar ELWazir <[email protected]> Date: Wed Nov 8 16:33:01 2023 +0000 SWDEV-430956: resolving the issue with V1 script reading long kernel names Change-Id: I30069e28dbcc8edf2b4b41a8afd6ca4014427da7 commit dbfe955 Author: Ammar ELWazir <[email protected]> Date: Mon Oct 30 16:53:21 2023 +0000 Fixing initialization of ROCTx for the tool in the default constructor Change-Id: I12a3bbde4420a9eb8d734c546169a11cb23237dd commit c46a118 Author: Ammar ELWazir <[email protected]> Date: Fri Oct 27 00:51:55 2023 +0000 Updating Golden Traces Change-Id: I1234bcbe7eff8e53855e23e246d821102672006d commit 7e8348b Author: Ammar ELWazir <[email protected]> Date: Wed Oct 25 13:51:58 2023 +0000 Adding Kernel Names instead Kernel Execution Change-Id: I102a8e741eeee18d240a62ff33e9b94e8f57d5ff commit d4c2546 Author: gobhardw <[email protected]> Date: Wed Oct 25 20:02:27 2023 +0530 SWDEV-408936 solving rocprof hang due to early initialization Change-Id: Ia9c52f3b4c7ff191054cef40208f26b28f021378 commit 54777ec Author: Benjamin Welton <[email protected]> Date: Thu Oct 19 15:26:15 2023 +0000 Replace direct file out with stringstream Change-Id: Id0a00d918df88d0031f9c8f847e65b416c09bf77 commit 38ba630 Author: Benjamin Welton <[email protected]> Date: Wed Oct 18 05:00:56 2023 +0000 Added ROCTRACER_BUFFER_SIZE to set buffer size [SWDEV-418917] reported that timing skew was being introduced by roctracer. Most of the cause of this problem seems to stem from outrunning the double buffering scheme that we use in memory_pool (part of the reason for this outrun is due to File writing being slow). A semi-quick fix that may be able to last until RocProf v2 is complete is to allow adjustment of the buffer size. ROCTRACER_BUFFER_SIZE env variable was introduced here which allows setting the buffer size of tracer tool. By increasing the buffer size, an ~8% reduction in execution time when timing on the program side. This should also reduce the frequency of large delays when we outrun the buffer. Note: increasing this size dramatically can cause slow startups (i.e. above 50MB). Change-Id: I98c4316cfe93a043623ae2669cfe1a5abb55c990 Change-Id: I1471f6d3bc3505c967da6443552213e80c5573a7
1 parent 4ff8075 commit 428469f

10 files changed

+10748
-8560
lines changed

plugin/file/file.cpp

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ class file_plugin_t {
222222
return HSA_STATUS_ERROR;
223223

224224
*file << std::hex << std::showbase << agent.handle << " agent "
225-
<< ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << std::endl;
225+
<< ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << "\n";
226226
return HSA_STATUS_SUCCESS;
227227
},
228228
&hsa_handles);
@@ -237,7 +237,7 @@ class file_plugin_t {
237237

238238
roctracer_timestamp_t app_begin_timestamp;
239239
CHECK_ROCTRACER(roctracer_get_timestamp(&app_begin_timestamp));
240-
begin_ts << std::dec << app_begin_timestamp << std::endl;
240+
begin_ts << std::dec << app_begin_timestamp << "\n";
241241
if (begin_ts.fail()) {
242242
warning("Cannot write to '%s'", begin_ts.name().c_str());
243243
return;
@@ -247,25 +247,28 @@ class file_plugin_t {
247247
}
248248

249249
int write_callback_record(const roctracer_record_t* record, const void* callback_data) {
250+
std::stringstream ss;
250251
output_file_t* output_file{nullptr};
251252
switch (record->domain) {
252253
case ACTIVITY_DOMAIN_ROCTX: {
253254
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
254255
output_file = get_output_file(ACTIVITY_DOMAIN_ROCTX);
255-
*output_file << std::dec << record->begin_ns << " " << record->process_id << ":"
256-
<< record->thread_id << " " << record->op << ":" << data->args.id << ":\""
257-
<< (data->args.message ? data->args.message : "") << "\"" << std::endl;
256+
ss << std::dec << record->begin_ns << " " << record->process_id << ":" << record->thread_id
257+
<< " " << record->op << ":" << data->args.id << ":\""
258+
<< (data->args.message ? data->args.message : "") << "\""
259+
<< "\n";
260+
*output_file << ss.str();
258261
break;
259262
}
260263
case ACTIVITY_DOMAIN_HSA_API: {
261264
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
262265
output_file = get_output_file(ACTIVITY_DOMAIN_HSA_API);
263-
*output_file << std::dec << record->begin_ns << ":"
264-
<< ((record->op == HSA_API_ID_hsa_shut_down) ? record->begin_ns
265-
: record->end_ns)
266-
<< " " << record->process_id << ":" << record->thread_id << " "
267-
<< hsa_api_data_pair_t(record->op, *data) << " :" << std::dec
268-
<< data->correlation_id << std::endl;
266+
ss << std::dec << record->begin_ns << ":"
267+
<< ((record->op == HSA_API_ID_hsa_shut_down) ? record->begin_ns : record->end_ns) << " "
268+
<< record->process_id << ":" << record->thread_id << " "
269+
<< hsa_api_data_pair_t(record->op, *data) << " :" << std::dec << data->correlation_id
270+
<< "\n";
271+
*output_file << ss.str();
269272
break;
270273
}
271274
case ACTIVITY_DOMAIN_HIP_API: {
@@ -283,10 +286,10 @@ class file_plugin_t {
283286
}
284287

285288
output_file = get_output_file(ACTIVITY_DOMAIN_HIP_API);
286-
*output_file << std::dec << record->begin_ns << ":" << record->end_ns << " "
287-
<< record->process_id << ":" << record->thread_id << " "
288-
<< hipApiString((hip_api_id_t)record->op, data) << kernel_name << " :"
289-
<< std::dec << data->correlation_id << std::endl;
289+
ss << std::dec << record->begin_ns << ":" << record->end_ns << " " << record->process_id
290+
<< ":" << record->thread_id << " " << hipApiString((hip_api_id_t)record->op, data)
291+
<< kernel_name << " :" << std::dec << data->correlation_id << "\n";
292+
*output_file << ss.str();
290293
break;
291294
}
292295
default:
@@ -299,6 +302,7 @@ class file_plugin_t {
299302

300303
int write_activity_records(const roctracer_record_t* begin, const roctracer_record_t* end) {
301304
while (begin != end) {
305+
std::stringstream ss;
302306
output_file_t* output_file{nullptr};
303307
const char* name = roctracer_op_string(begin->domain, begin->op, begin->kind);
304308

@@ -311,21 +315,26 @@ class file_plugin_t {
311315
if (begin->correlation_id == 0) break;
312316

313317
output_file = get_output_file(ACTIVITY_DOMAIN_HIP_OPS);
314-
*output_file << std::dec << begin->begin_ns << ":" << begin->end_ns << " "
315-
<< begin->device_id << ":" << begin->queue_id << " " << name << ":"
316-
<< begin->correlation_id << ":" << GetPid() << std::endl;
318+
ss << std::dec << begin->begin_ns << ":" << begin->end_ns << " " << begin->device_id
319+
<< ":" << begin->queue_id << " "
320+
<< ((begin->op == HIP_OP_ID_DISPATCH && begin->kernel_name != nullptr)
321+
? truncate_name(cxx_demangle(begin->kernel_name))
322+
: name)
323+
<< ":" << begin->correlation_id << ":" << GetPid() << "\n";
324+
*output_file << ss.str();
317325
break;
318326
}
319327
case ACTIVITY_DOMAIN_HSA_OPS:
320328
output_file = get_output_file(ACTIVITY_DOMAIN_HSA_OPS, begin->op);
321329
if (begin->op == HSA_OP_ID_COPY) {
322-
*output_file << std::dec << begin->begin_ns << ":" << begin->end_ns
323-
<< " async-copy:" << begin->correlation_id << ":" << GetPid() << std::endl;
330+
ss << std::dec << begin->begin_ns << ":" << begin->end_ns
331+
<< " async-copy:" << begin->correlation_id << ":" << GetPid() << "\n";
332+
*output_file << ss.str();
324333
break;
325334
} else if (begin->op == HSA_OP_ID_RESERVED1) {
326-
*output_file << std::dec << begin->pc_sample.se << " " << begin->pc_sample.cycle << " "
327-
<< std::hex << std::showbase << begin->pc_sample.pc << " " << name
328-
<< std::endl;
335+
ss << std::dec << begin->pc_sample.se << " " << begin->pc_sample.cycle << " "
336+
<< std::hex << std::showbase << begin->pc_sample.pc << " " << name << "\n";
337+
*output_file << ss.str();
329338
break;
330339
}
331340
[[fallthrough]];

src/tracer_tool/tracer_tool.cpp

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,13 @@ uint32_t GetTid() {
9494
return tid;
9595
}
9696

97+
size_t GetBufferSize() {
98+
auto bufSize = getenv("ROCTRACER_BUFFER_SIZE");
99+
// Default size if not set
100+
if (!bufSize) return 0x200000;
101+
return std::stoll({bufSize});
102+
}
103+
97104
// Tracing control thread
98105
uint32_t control_delay_us = 0;
99106
uint32_t control_len_us = 0;
@@ -217,7 +224,7 @@ struct roctx_trace_entry_t {
217224
};
218225

219226
roctracer::TraceBuffer<roctx_trace_entry_t> roctx_trace_buffer(
220-
"rocTX API", 0x200000, [](roctx_trace_entry_t* entry) {
227+
"rocTX API", GetBufferSize(), [](roctx_trace_entry_t* entry) {
221228
assert(plugin && "plugin is not initialized");
222229
plugin->write_callback_record(&entry->record, &entry->data);
223230
});
@@ -258,7 +265,7 @@ struct hsa_api_trace_entry_t {
258265
};
259266

260267
roctracer::TraceBuffer<hsa_api_trace_entry_t> hsa_api_trace_buffer(
261-
"HSA API", 0x200000, [](hsa_api_trace_entry_t* entry) {
268+
"HSA API", GetBufferSize(), [](hsa_api_trace_entry_t* entry) {
262269
assert(plugin && "plugin is not initialized");
263270
plugin->write_callback_record(&entry->record, &entry->data);
264271
});
@@ -397,7 +404,7 @@ static std::optional<std::string> getKernelName(uint32_t cid, const hip_api_data
397404
}
398405

399406
roctracer::TraceBuffer<hip_api_trace_entry_t> hip_api_trace_buffer(
400-
"HIP API", 0x200000, [](hip_api_trace_entry_t* entry) {
407+
"HIP API", GetBufferSize(), [](hip_api_trace_entry_t* entry) {
401408
assert(plugin && "plugin is not initialized");
402409
plugin->write_callback_record(&entry->record, &entry->data);
403410
});
@@ -481,7 +488,7 @@ int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const
481488
void open_tracing_pool() {
482489
if (roctracer_default_pool() == nullptr) {
483490
roctracer_properties_t properties{};
484-
properties.buffer_size = 0x80000;
491+
properties.buffer_size = GetBufferSize();
485492
properties.buffer_callback_fun = [](const char* begin, const char* end, void* /* arg */) {
486493
assert(plugin && "plugin is not initialized");
487494
plugin->write_activity_records(reinterpret_cast<const roctracer_record_t*>(begin),
@@ -546,14 +553,6 @@ void tool_load() {
546553
if (is_loaded == true) return;
547554
is_loaded = true;
548555

549-
// Load output plugin
550-
const char* plugin_name = getenv("ROCTRACER_PLUGIN_LIB");
551-
if (plugin_name == nullptr) plugin_name = "libfile_plugin.so";
552-
if (Dl_info dl_info; dladdr((void*)tool_load, &dl_info) != 0) {
553-
if (!plugin.emplace(fs::path(dl_info.dli_fname).replace_filename(plugin_name)).is_valid())
554-
plugin.reset();
555-
}
556-
557556
// API traces switches
558557
const char* trace_domain = getenv("ROCTRACER_DOMAIN");
559558
if (trace_domain != nullptr) {
@@ -693,6 +692,14 @@ ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
693692
return true;
694693
}
695694

695+
// Load output plugin
696+
const char* plugin_name = getenv("ROCTRACER_PLUGIN_LIB");
697+
if (plugin_name == nullptr) plugin_name = "libfile_plugin.so";
698+
if (Dl_info dl_info; dladdr((void*)tool_load, &dl_info) != 0) {
699+
if (!plugin.emplace(fs::path(dl_info.dli_fname).replace_filename(plugin_name)).is_valid())
700+
plugin.reset();
701+
}
702+
696703
tool_load();
697704

698705
// OnUnload may not be called if the ROC runtime is not shutdown by the client

test/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ set(CMAKE_EXECUTABLE_RPATH_LINK_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RPATH_LINK_CXX_F
2929

3030
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ROCM_PATH}/lib/cmake/hip")
3131
set(CMAKE_HIP_ARCHITECTURES OFF)
32+
if(DEFINED ROCM_PATH)
33+
set(HIP_ROOT_DIR "${ROCM_PATH}/bin")
34+
endif()
3235
find_package(HIP REQUIRED MODULE)
3336

3437
find_package(Clang REQUIRED CONFIG

0 commit comments

Comments
 (0)