Fix LoRA Trainer bugs with FP8 models. (#9854)

* Fix adapter weight init

* Fix fp8 model training

* Avoid inference tensor
This commit is contained in:
Kohaku-Blueleaf
2025-09-21 09:24:48 +08:00
committed by GitHub
parent 9ed3c5cc09
commit 7be2b49b6b
6 changed files with 34 additions and 15 deletions

View File

@@ -89,8 +89,8 @@ class LoKrAdapter(WeightAdapterBase):
in_dim = weight.shape[1:].numel()
out1, out2 = factorization(out_dim, rank)
in1, in2 = factorization(in_dim, rank)
mat1 = torch.empty(out1, in1, device=weight.device, dtype=weight.dtype)
mat2 = torch.empty(out2, in2, device=weight.device, dtype=weight.dtype)
mat1 = torch.empty(out1, in1, device=weight.device, dtype=torch.float32)
mat2 = torch.empty(out2, in2, device=weight.device, dtype=torch.float32)
torch.nn.init.kaiming_uniform_(mat2, a=5**0.5)
torch.nn.init.constant_(mat1, 0.0)
return LokrDiff(