NVIDIA
diff --git a/‎tests/integration/defs/.test_durations
+386-398 b/‎tests/integration/defs/.test_durations
+386-398
diff --git a/‎tests/integration/defs/conftest.py
-16 b/‎tests/integration/defs/conftest.py
-16
diff --git a/‎tests/integration/test_lists/qa/examples_test_list.txt
+14-14 b/‎tests/integration/test_lists/qa/examples_test_list.txt
+14-14
diff --git a/‎tests/integration/test_lists/qa/llm_sanity_test.txt
+8-8 b/‎tests/integration/test_lists/qa/llm_sanity_test.txt
+8-8
diff --git a/‎tests/integration/test_lists/test-db/l0_a10.yml
+10-10 b/‎tests/integration/test_lists/test-db/l0_a10.yml
+10-10
diff --git a/‎tests/integration/test_lists/test-db/l0_a30.yml
+4-4 b/‎tests/integration/test_lists/test-db/l0_a30.yml
+4-4
diff --git a/‎tests/integration/test_lists/test-db/l0_b200.yml
+6-6 b/‎tests/integration/test_lists/test-db/l0_b200.yml
+6-6
@@ -1971,22 +1971,6 @@ def get_device_memory():
     return memory
 
 
-#
-# When test parameters have an empty id, older versions of pytest ignored that parameter when generating the
-# test node's ID completely. This however was actually a bug, and not expected behavior that got fixed in newer
-# versions of pytest:https://github.com/pytest-dev/pytest/pull/6607. TRT test defs however rely on this behavior
-# for quite a few test names. This is a hacky WAR that restores the old behavior back so that the
-# test names do not change. Note: This might break in a future pytest version.
-#
-# TODO: Remove this hack once the test names are fixed.
-#
-
-from _pytest.python import CallSpec2
-
-CallSpec2.id = property(
-    lambda self: "-".join(map(str, filter(None, self._idlist))))
-
-
 def pytest_addoption(parser):
     parser.addoption(
         "--test-list",
 
@@ -424,18 +424,18 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
 accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_fp8_tp2
 accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_nvfp4_tp2
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[]
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[-]
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[]
 accuracy/test_llm_api_pytorch.py::TestMinitron4BBaseInstruct::test_fp8_prequantized
 accuracy/test_llm_api_pytorch.py::TestNemotronNas::test_auto_dtype_tp8
 accuracy/test_llm_api_pytorch.py::TestQwen2_7BInstruct::test_auto_dtype
-accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8-cuda_graph-overlap_scheduler]
-accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8-mtp_nextn=2-cuda_graph-overlap_scheduler]
-accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8ep4-cuda_graph-overlap_scheduler]
-accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8ep4-mtp_nextn=2-cuda_graph-overlap_scheduler]
-accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8ep8-cuda_graph-overlap_scheduler]
-accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8ep8-mtp_nextn=2-cuda_graph-overlap_scheduler]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8---cuda_graph-overlap_scheduler]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8-mtp_nextn=2--cuda_graph-overlap_scheduler]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8ep4---cuda_graph-overlap_scheduler]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8ep4-mtp_nextn=2--cuda_graph-overlap_scheduler]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8ep8---cuda_graph-overlap_scheduler]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[tp8ep8-mtp_nextn=2--cuda_graph-overlap_scheduler]
 
 test_e2e.py::test_benchmark_sanity[bert_base] # 127.18s
 test_e2e.py::test_benchmark_sanity[gpt_350m] # 64.06s
@@ -445,14 +445,14 @@ test_e2e.py::test_benchmark_sanity[roberta_base]
 test_e2e.py::test_benchmark_sanity[t5_base]
 test_e2e.py::test_benchmark_sanity_enable_fp8[gpt_350m]
 test_e2e.py::test_benchmark_sanity_enable_fp8[llama_7b]
-test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding]
-test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding]
-test_e2e.py::test_llama_e2e[use_py_session]
+test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-]
+test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding-]
+test_e2e.py::test_llama_e2e[use_py_session--]
 test_e2e.py::test_llmapi_load_engine_from_build_command[llama-codellama/CodeLlama-7b-Instruct-hf] # 5min
 test_e2e.py::test_llmapi_load_engine_from_build_command[llama-llama-models/llama-7b-hf] # 5min
-test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding]
-test_e2e.py::test_mistral_e2e[use_py_session-remove_input_padding]
-test_e2e.py::test_mistral_e2e[use_py_session]
+test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding--]
+test_e2e.py::test_mistral_e2e[use_py_session-remove_input_padding--]
+test_e2e.py::test_mistral_e2e[use_py_session---]
 test_e2e.py::test_openai_multi_chat_example
 test_e2e.py::test_openai_consistent_chat
 test_e2e.py::test_llmapi_server_example
 
@@ -91,14 +91,14 @@ examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-re
 examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8]
 examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8]
 examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_cpp_runtime]
-test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding]
-test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding]
-test_e2e.py::test_llama_e2e[use_py_session]
+test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-]
+test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding-]
+test_e2e.py::test_llama_e2e[use_py_session--]
 test_e2e.py::test_llmapi_load_engine_from_build_command[llama-codellama/CodeLlama-7b-Instruct-hf] # 5min
 test_e2e.py::test_llmapi_load_engine_from_build_command[llama-llama-models/llama-7b-hf] # 5min
-test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding]
-test_e2e.py::test_mistral_e2e[use_py_session-remove_input_padding]
-test_e2e.py::test_mistral_e2e[use_py_session]
+test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding--]
+test_e2e.py::test_mistral_e2e[use_py_session-remove_input_padding--]
+test_e2e.py::test_mistral_e2e[use_py_session---]
 test_e2e.py::test_openai_multi_chat_example
 test_e2e.py::test_openai_consistent_chat
 
@@ -130,8 +130,8 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
 accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
 accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_fp8_tp2
 accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_nvfp4_tp2
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[]
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-]
+accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[-]
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[]
 
 # Pivot to Pytorch test cases.
 
@@ -34,11 +34,11 @@ l0_a10:
       backend: cpp
   tests:
   # ------------- CPP tests ---------------
-  - test_cpp.py::test_model[medusa-86]
-  - test_cpp.py::test_model[redrafter-86]
-  - test_cpp.py::test_model[mamba-86]
-  - test_cpp.py::test_model[recurrentgemma-86]
-  - test_cpp.py::test_model[eagle-86]
+  - test_cpp.py::test_model[-medusa-86]
+  - test_cpp.py::test_model[-redrafter-86]
+  - test_cpp.py::test_model[-mamba-86]
+  - test_cpp.py::test_model[-recurrentgemma-86]
+  - test_cpp.py::test_model[-eagle-86]
 - condition:
     ranges:
       system_gpu_count:
@@ -75,9 +75,9 @@ l0_a10:
   - test_e2e.py::test_openai_completions_example
   - test_e2e.py::test_openai_chat_example
   - test_e2e.py::test_openai_chat_multimodal_example
-  - test_e2e.py::test_trtllm_bench_sanity[non-streaming-FP16-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]
+  - test_e2e.py::test_trtllm_bench_sanity[--non-streaming-FP16-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]
   - test_e2e.py::test_trtllm_bench_latency_sanity[FP16-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]
-  - test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency]
+  - test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency-]
   - unittest/trt/quantization # 18 mins
   - accuracy/test_cli_flow.py::TestLlama7B::test_streamingllm # 2 mins
   - unittest/trt/functional # 37 mins
@@ -127,9 +127,9 @@ l0_a10:
       stage: post_merge
       backend: tensorrt
   tests:
-  - test_e2e.py::test_mistral_e2e[use_py_session]
-  - test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding]
-  - test_e2e.py::test_mistral_e2e[use_py_session-remove_input_padding]
+  - test_e2e.py::test_mistral_e2e[use_py_session---]
+  - test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding--]
+  - test_e2e.py::test_mistral_e2e[use_py_session-remove_input_padding--]
   - examples/test_bert.py::test_llm_bert_general[compare_hf-disable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float32-BertModel-bert/bert-base-uncased]
   - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-RobertaModel-bert/roberta-base]
   - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion]
 
@@ -47,9 +47,9 @@ l0_a30:
   - test_cpp.py::test_unit_tests[runtime-80]
   - test_cpp.py::test_unit_tests[thop-80]
   - test_cpp.py::test_unit_tests[utils-80]
-  - test_cpp.py::test_model[gpt-80]
-  - test_cpp.py::test_model[gpt_executor-80]
-  - test_cpp.py::test_model[gpt_tests-80]
+  - test_cpp.py::test_model[-gpt-80]
+  - test_cpp.py::test_model[-gpt_executor-80]
+  - test_cpp.py::test_model[-gpt_tests-80]
 - condition:
     ranges:
       system_gpu_count:
@@ -163,5 +163,5 @@ l0_a30:
       stage: post_merge
       backend: cpp
   tests:
-  - test_cpp.py::test_model[gpt_session-80]
+  - test_cpp.py::test_model[-gpt_session-80]
   - test_cpp.py::test_benchmarks[gpt-80]
@@ -15,12 +15,12 @@ l0_b200:
   tests:
   # ------------- PyTorch tests ---------------
   - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
-  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[]
-  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[attention_dp]
-  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[cuda_graph]
-  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[overlap_scheduler]
-  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[attention_dp-cuda_graph-overlap_scheduler]
-  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2]
+  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-]
+  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-attention_dp]
+  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-cuda_graph]
+  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-overlap_scheduler]
+  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-attention_dp-cuda_graph-overlap_scheduler]
+  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-]
   - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp]
   - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-cuda_graph]
   - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-overlap_scheduler]