Do padding of audio embed in model for humo for more flexibility. (#9935)

This commit is contained in:
comfyanonymous
2025-09-18 16:54:16 -07:00
committed by GitHub
parent 1ea8c54064
commit 24b0fce099
2 changed files with 3 additions and 4 deletions

View File

@@ -1551,6 +1551,9 @@ class HumoWanModel(WanModel):
context_img_len = None
if audio_embed is not None:
if reference_latent is not None:
zero_audio_pad = torch.zeros(audio_embed.shape[0], reference_latent.shape[-3], *audio_embed.shape[2:], device=audio_embed.device, dtype=audio_embed.dtype)
audio_embed = torch.cat([audio_embed, zero_audio_pad], dim=1)
audio = self.audio_proj(audio_embed).permute(0, 3, 1, 2).flatten(2).transpose(1, 2)
else:
audio = None