diff --git a/tests/models/kyutai_speech_to_text/test_modeling_kyutai_speech_to_text.py b/tests/models/kyutai_speech_to_text/test_modeling_kyutai_speech_to_text.py index 48ccc3d01832..ccc5c937e247 100644 --- a/tests/models/kyutai_speech_to_text/test_modeling_kyutai_speech_to_text.py +++ b/tests/models/kyutai_speech_to_text/test_modeling_kyutai_speech_to_text.py @@ -637,7 +637,7 @@ def test_generation(self): samples = self._load_datasamples(1) inputs = processor( - samples, + audio=samples, ).to(torch_device) out = model.generate(**inputs) @@ -667,7 +667,7 @@ def test_generation_batched(self): samples = self._load_datasamples(4) inputs = processor( - samples, + audio=samples, ).to(torch_device) out = model.generate(**inputs) diff --git a/tests/models/llava_onevision/test_processing_llava_onevision.py b/tests/models/llava_onevision/test_processing_llava_onevision.py index b7842300b099..c369a0236c6d 100644 --- a/tests/models/llava_onevision/test_processing_llava_onevision.py +++ b/tests/models/llava_onevision/test_processing_llava_onevision.py @@ -59,13 +59,8 @@ def setUpClass(cls): except Exception: local_tiny_video = None - local_videos = [ - os.path.join(repo_root, "Big_Buck_Bunny_720_10s_10MB.mp4"), - os.path.join(repo_root, "sample_demo_1.mp4"), - ] cls.local_tiny_video = local_tiny_video MODALITY_INPUT_DATA["images"] = [local_image, local_image] - MODALITY_INPUT_DATA["videos"] = local_videos # Force video decoding to use torchvision backend to avoid torchcodec dependency during tests video_processing_utils.is_torchcodec_available = lambda: False # type: ignore diff --git a/tests/models/longcat_flash/test_modeling_longcat_flash.py b/tests/models/longcat_flash/test_modeling_longcat_flash.py index 0c916dac558e..8686c1fe6096 100644 --- a/tests/models/longcat_flash/test_modeling_longcat_flash.py +++ b/tests/models/longcat_flash/test_modeling_longcat_flash.py @@ -352,7 +352,7 @@ def test_shortcat_generation(self): ).to(self.model.device) with torch.no_grad(): - outputs = self.model.generate(inputs, max_new_tokens=10, do_sample=False) + outputs = self.model.generate(inputs["input_ids"], max_new_tokens=10, do_sample=False) response = self.tokenizer.batch_decode(outputs, skip_special_tokens=False)[0] expected_output = "[Round 0] USER:Paris is... ASSISTANT: dig年车龄juanaheast稍achaotingupebarebones" @@ -370,7 +370,7 @@ def test_longcat_generation_cpu(self): inputs = tokenizer.apply_chat_template(chat, tokenize=True, add_generation_prompt=True, return_tensors="pt") with torch.no_grad(): - outputs = model.generate(inputs, max_new_tokens=3, do_sample=False) + outputs = model.generate(inputs["input_ids"], max_new_tokens=3, do_sample=False) response = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0] expected_output = "[Round 0] USER:Paris is... ASSISTANT:Paris is..."