ops: Fix vanilla-fp8 loaded lora quality (#12390)
This was missing the stochastic rounding required for fp8 downcast to be consistent with model_patcher.patch_weight_to_device. Missed in testing as I spend too much time with quantized tensors and overlooked the simpler ones.
This commit is contained in:
+2
-2
@@ -169,8 +169,8 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
|
|||||||
if orig.dtype == dtype and len(fns) == 0:
|
if orig.dtype == dtype and len(fns) == 0:
|
||||||
#The layer actually wants our freshly saved QT
|
#The layer actually wants our freshly saved QT
|
||||||
x = y
|
x = y
|
||||||
else:
|
elif update_weight:
|
||||||
y = x
|
y = comfy.float.stochastic_rounding(x, orig.dtype, seed = comfy.utils.string_to_seed(s.seed_key))
|
||||||
if update_weight:
|
if update_weight:
|
||||||
orig.copy_(y)
|
orig.copy_(y)
|
||||||
for f in fns:
|
for f in fns:
|
||||||
|
|||||||
Reference in New Issue
Block a user