I found a case where this is needed (#10875)
This commit is contained in:
@@ -405,8 +405,8 @@ class TensorCoreFP8Layout(QuantizedLayout):
|
|||||||
|
|
||||||
tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
|
tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
|
||||||
# TODO: uncomment this if it's actually needed because the clamp has a small performance penality'
|
# TODO: uncomment this if it's actually needed because the clamp has a small performance penality'
|
||||||
# lp_amax = torch.finfo(dtype).max
|
lp_amax = torch.finfo(dtype).max
|
||||||
# torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
|
torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
|
||||||
qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)
|
qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)
|
||||||
|
|
||||||
layout_params = {
|
layout_params = {
|
||||||
|
|||||||
Reference in New Issue
Block a user