Do padding of audio embed in model for humo for more flexibility. (#9935)
This commit is contained in:
@@ -1551,6 +1551,9 @@ class HumoWanModel(WanModel):
|
||||
context_img_len = None
|
||||
|
||||
if audio_embed is not None:
|
||||
if reference_latent is not None:
|
||||
zero_audio_pad = torch.zeros(audio_embed.shape[0], reference_latent.shape[-3], *audio_embed.shape[2:], device=audio_embed.device, dtype=audio_embed.dtype)
|
||||
audio_embed = torch.cat([audio_embed, zero_audio_pad], dim=1)
|
||||
audio = self.audio_proj(audio_embed).permute(0, 3, 1, 2).flatten(2).transpose(1, 2)
|
||||
else:
|
||||
audio = None
|
||||
|
||||
Reference in New Issue
Block a user