I found a case where this is needed (#10875)

2025-11-25 00:23:19 -08:00
parent acfaa5c4a1
commit 015a0599d0
1 changed files with 2 additions and 2 deletions
@@ -405,8 +405,8 @@ class TensorCoreFP8Layout(QuantizedLayout):

        tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
        # TODO: uncomment this if it's actually needed because the clamp has a small performance penality'
-        # lp_amax = torch.finfo(dtype).max
-        # torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
+        lp_amax = torch.finfo(dtype).max
+        torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
        qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)

        layout_params = {