File tree
118 files changed
+123
-401
lines changed- tests/functional_tests/test_cases
- bert
- bert_mr_mcore_tp2_pp2_dgx_a100_1N8G
- bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G
- bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G
- bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G
- bert_mr_tp1_pp4_vp2_dgx_a100_1N8G
- bert_mr_tp1_pp4_vp2_resume_torch_dgx_a100_1N8G
- bert_mr_tp2_pp2_dgx_a100_1N8G
- bert_mr_tp2_pp2_resume_torch_dgx_a100_1N8G
- bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2
- bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2
- bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1
- gpt
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch
- gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G
- gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G
- gpt3_mr_tp1_pp4_vp1_resume_torch_dgx_a100_1N8G
- moe
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel
- gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts
- gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G
- multimodal-llava
- multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp2_pp3_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp4_pp1_etp3_dgx_a100_1N7G
- multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dist_opt_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp4_pp1_resume_torch_etp3_dgx_a100_1N7G
- t5
- t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G
- t5_220m_mr_mcore_te_tp2_pp2_resume_torch_dgx_a100_1N8G
- t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G
- t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G
- t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G
- t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G
- t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G
- t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G
- t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch
- t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1
- t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel
- t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1
- t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch
- t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
118 files changed
+123
-401
lines changed+6-2
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
69 | 69 |
| |
70 | 70 |
| |
71 | 71 |
| |
72 |
| - | |
| 72 | + | |
| 73 | + | |
| 74 | + | |
73 | 75 |
| |
74 | 76 |
| |
75 | 77 |
| |
| |||
83 | 85 |
| |
84 | 86 |
| |
85 | 87 |
| |
86 |
| - | |
| 88 | + | |
| 89 | + | |
| 90 | + | |
87 | 91 |
| |
88 | 92 |
| |
89 | 93 |
| |
|
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
0 commit comments