Use pytorch attention by default on nvidia when xformers isn't present.

Add a new argument --use-quad-cross-attention
2023-06-26 12:55:07 -04:00
parent 9b93b920be
commit 8248babd44
2 changed files with 20 additions and 3 deletions
@@ -139,7 +139,23 @@ else:
    except:
        XFORMERS_IS_AVAILABLE = False

+def is_nvidia():
+    global cpu_state
+    if cpu_state == CPUState.GPU:
+        if torch.version.cuda:
+            return True
+
 ENABLE_PYTORCH_ATTENTION = args.use_pytorch_cross_attention
+
+if ENABLE_PYTORCH_ATTENTION == False and XFORMERS_IS_AVAILABLE == False and args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
+    try:
+        if is_nvidia():
+            torch_version = torch.version.__version__
+            if int(torch_version[0]) >= 2:
+                ENABLE_PYTORCH_ATTENTION = True
+    except:
+        pass
+
 if ENABLE_PYTORCH_ATTENTION:
    torch.backends.cuda.enable_math_sdp(True)
    torch.backends.cuda.enable_flash_sdp(True)
@@ -347,7 +363,7 @@ def pytorch_attention_flash_attention():
    global ENABLE_PYTORCH_ATTENTION
    if ENABLE_PYTORCH_ATTENTION:
        #TODO: more reliable way of checking for flash attention?
-        if torch.version.cuda: #pytorch flash attention only works on Nvidia
+        if is_nvidia(): #pytorch flash attention only works on Nvidia
            return True
    return False

@@ -438,7 +454,7 @@ def soft_empty_cache():
    elif xpu_available:
        torch.xpu.empty_cache()
    elif torch.cuda.is_available():
-        if torch.version.cuda: #This seems to make things worse on ROCm so I only do it for cuda
+        if is_nvidia(): #This seems to make things worse on ROCm so I only do it for cuda
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()