Turn off cuda malloc by default when --fast autotune is turned on. (#10393)

2025-10-18 19:35:46 -07:00
parent 9da397ea2f
commit 5b80addafd
3 changed files with 7 additions and 6 deletions
@@ -371,6 +371,9 @@ try:
 except:
    pass

+if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
+    torch.backends.cudnn.benchmark = True
+
 try:
    if torch_version_numeric >= (2, 5):
        torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)
@@ -67,9 +67,6 @@ except:

 cast_to = comfy.model_management.cast_to #TODO: remove once no more references

-if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
-    torch.backends.cudnn.benchmark = True
-
 def cast_to_input(weight, input, non_blocking=False, copy=True):
    return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)