Skip to content

Commit e759c47

Browse files
generatedunixname499836121facebook-github-bot
generatedunixname499836121
authored andcommitted
Use cuBLAS default workspace size in Lt (#153556)
Summary: Also enables unified workspaces by default for non-FBCODE use cases. Default Lt workspace size is also updated to match cuBLAS logic for default, including for Blackwell (SM 10.0) and GeForce Blackwell (SM 12.0). Recommended defaults are documented here: https://docs.nvidia.com/cuda/cublas/#cublassetworkspace X-link: pytorch/pytorch#153556 Approved by: https://github.com/Skylion007, https://github.com/ngimel Reviewed By: izaitsevfb Differential Revision: D75387537 fbshipit-source-id: 7ec68ba7362c49a3cfc16fa7e46c0ba490a601c1
1 parent 4617d91 commit e759c47

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

userbenchmark/dynamo/dynamobench/common.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3609,16 +3609,15 @@ def run(runner, args, original_dir=None):
36093609
if args.devices == ["xpu"]:
36103610
torch.use_deterministic_algorithms(True, warn_only=True)
36113611
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
3612-
# TODO(eqy): revisit when cuBLASLt workspace size is bumped
3613-
# if args.only is not None and args.only in {
3614-
# "DebertaForQuestionAnswering",
3615-
# "RobertaForQuestionAnswering",
3616-
# "nvidia_deeprecommender",
3617-
# "volo_d1_224",
3618-
# }:
3619-
# # These seem unhappy with numerics of larger cuBLASLt workspace
3620-
# # sizes following #145130 (due to enabling split-k?)
3621-
# torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
3612+
if args.only is not None and args.only in {
3613+
"DebertaForQuestionAnswering",
3614+
"nvidia_deeprecommender",
3615+
"crossvit_9_240",
3616+
}:
3617+
# These seem unhappy with numerics of larger cuBLASLt workspace
3618+
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
3619+
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
3620+
36223621
torch.backends.cudnn.deterministic = True
36233622
torch.backends.cudnn.allow_tf32 = False
36243623
torch.backends.cudnn.benchmark = False

userbenchmark/dynamo/dynamobench/timm_models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def pip_install(package):
7171
}
7272

7373
REQUIRE_HIGHER_TOLERANCE = {
74+
"crossvit_9_240",
7475
"fbnetv3_b",
7576
"gmixer_24_224",
7677
"hrnet_w18",

0 commit comments

Comments
 (0)