Add --use-flash-attention flag. (#7223)
* Add --use-flash-attention flag. This is useful on AMD systems, as FA builds are still 10% faster than Pytorch cross-attention.
This commit is contained in:
@@ -930,6 +930,9 @@ def cast_to_device(tensor, device, dtype, copy=False):
|
||||
def sage_attention_enabled():
|
||||
return args.use_sage_attention
|
||||
|
||||
def flash_attention_enabled():
|
||||
return args.use_flash_attention
|
||||
|
||||
def xformers_enabled():
|
||||
global directml_enabled
|
||||
global cpu_state
|
||||
|
||||
Reference in New Issue
Block a user