@@ -320,21 +320,27 @@ def test_forward(self):
320
320
# Try with labels and PackedSeqParams. Only micro batch size 1 is supported in this mode.
321
321
packed_seq_params = PackedSeqParams (
322
322
qkv_format = "thd" ,
323
- cu_seqlens_q = [0 , 512 , 1024 , 1600 ], # Just example values.
324
- cu_seqlens_kv = [0 , 512 , 1024 , 1600 ],
325
- max_seqlen_q = [1600 ],
326
- max_seqlen_kv = [1600 ],
323
+ cu_seqlens_q = torch .tensor (
324
+ [0 , 512 , 1024 , 1600 ], dtype = torch .int32
325
+ ).cuda (), # Just example values.
326
+ cu_seqlens_kv = torch .tensor ([0 , 512 , 1024 , 1600 ], dtype = torch .int32 ).cuda (),
327
+ max_seqlen_q = torch .tensor (1600 , dtype = torch .int32 ).cuda (),
328
+ max_seqlen_kv = torch .tensor (1600 , dtype = torch .int32 ).cuda (),
327
329
)
328
330
331
+ # NOTE: Packing is only supported with BF16. Use BF16 here and switch back to default.
332
+ self .model .to (torch .bfloat16 )
329
333
loss , new_loss_mask = self .model .forward (
330
- img [:1 ],
334
+ img [:1 ]. to ( torch . bfloat16 ) ,
331
335
input_ids [:1 ],
332
336
position_ids [:1 ],
333
337
attention_mask ,
334
338
labels [:1 ],
335
339
loss_mask [:1 ],
336
340
num_image_tiles = num_image_tiles [:1 ],
341
+ packed_seq_params = packed_seq_params ,
337
342
)
343
+ self .model .to (torch .float32 )
338
344
339
345
# 1600 = 577 (img_seq_len) + 1024 (text tokens in the first sample) - 1 (image token).
340
346
assert loss .shape == new_loss_mask .shape == torch .Size ((1 , 1600 ))
0 commit comments