File tree
130 files changed
+176
-502
lines changed- tests/functional_tests/test_cases
- bert
- bert_mr_mcore_tp2_pp2_dgx_a100_1N8G
- bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G
- bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G
- bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G
- bert_mr_tp1_pp4_vp2_dgx_a100_1N8G
- bert_mr_tp1_pp4_vp2_resume_torch_dgx_a100_1N8G
- bert_mr_tp2_pp2_dgx_a100_1N8G
- bert_mr_tp2_pp2_resume_torch_dgx_a100_1N8G
- bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2
- bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2
- bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1
- bert_nightly_dgx_a100_1N8G_tp1_pp2
- bert_nightly_dgx_a100_1N8G_tp4_pp1
- gpt
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch
- gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G
- gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G
- gpt3_mr_tp1_pp4_vp1_resume_torch_dgx_a100_1N8G
- moe
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel
- gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts
- gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G
- multimodal-llava
- multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp2_pp3_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp4_pp1_etp3_dgx_a100_1N7G
- multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dist_opt_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp4_pp1_resume_torch_etp3_dgx_a100_1N7G
- t5
- t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G
- t5_220m_mr_mcore_te_tp2_pp2_resume_torch_dgx_a100_1N8G
- t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G
- t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G
- t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G
- t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G
- t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G
- t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G
- t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch
- t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1
- t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel
- t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1
- t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch
- t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
130 files changed
+176
-502
lines changedOriginal file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + |
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 |
| - | |
| 1 | + |
0 commit comments