Improve the power throtteling script
This commit is contained in:
@@ -4,6 +4,8 @@ After=sysinit.target
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
|
# Set the power limit in watts via environment variable (default: 120W)
|
||||||
|
Environment="POWER_LIMIT_WATTS=120"
|
||||||
ExecStart=/usr/local/bin/throttle_instinct.sh
|
ExecStart=/usr/local/bin/throttle_instinct.sh
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
|
|
||||||
|
|||||||
@@ -1,45 +1,75 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
watt_limit=120
|
# Check if running as root
|
||||||
|
if [ "$EUID" -ne 0 ]; then
|
||||||
|
echo "Error: This script must be run as root (sudo or with appropriate privileges)" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
for i in {0..10}; do
|
# Allow power limit to be set via environment variable or use default
|
||||||
card="/sys/class/drm/card$i"
|
watt_limit=${POWER_LIMIT_WATTS:-225}
|
||||||
# Skip non-existing, virtual or non-GPU devices
|
|
||||||
if [ ! -d "$card" ] || [ ! -e "$card/device" ]; then
|
# Iterate through hwmon devices instead of DRM cards
|
||||||
|
# This only processes devices that have hwmon capabilities
|
||||||
|
devices_found=0
|
||||||
|
|
||||||
|
for hwmon in /sys/class/hwmon/hwmon*; do
|
||||||
|
# Skip if hwmon device doesn't exist or has no device link
|
||||||
|
if [ ! -d "$hwmon" ] || [ ! -e "$hwmon/device" ]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if power1_cap exists (this device supports power limiting)
|
||||||
|
power_cap_file="$hwmon/power1_cap"
|
||||||
|
if [ ! -e "$power_cap_file" ]; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Resolve the PCI device path
|
# Resolve the PCI device path
|
||||||
device_path=$(readlink -f "$card/device")
|
device_path=$(readlink -f "$hwmon/device")
|
||||||
|
|
||||||
# Read vendor and device IDs from sysfs
|
# Read vendor and device IDs from sysfs
|
||||||
vendor_id=$(cat "$device_path/vendor")
|
if [ ! -e "$device_path/vendor" ] || [ ! -e "$device_path/device" ]; then
|
||||||
device_id=$(cat "$device_path/device")
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
if [ "$vendor_id" == "0x1002" ] && [ "$device_id" == "0x66a0" ]; then
|
vendor_id=$(cat "$device_path/vendor" 2>/dev/null)
|
||||||
echo "Vega detected, limiting to $watt_limit W."
|
device_id=$(cat "$device_path/device" 2>/dev/null)
|
||||||
|
|
||||||
# TODO check if the hwmon / power indices are stable with multiple cards or
|
# Check if this is a target GPU
|
||||||
# make the script robust to always set the correct power cap
|
# AMD Vega 20 / MI50: 0x1002:0x66a0 or 0x1002:0x66a1
|
||||||
found=0
|
# Test device: 0x1234:0x1111
|
||||||
for hwmon in /sys/class/hwmon/hwmon*; do
|
if { [ "$vendor_id" == "0x1002" ] &&
|
||||||
if [ -e "$hwmon/device" ]; then
|
{ [ "$device_id" == "0x66a0" ] || [ "$device_id" == "0x66a1" ]; }; }; then
|
||||||
hwmon_dev=$(readlink -f "$hwmon/device")
|
|
||||||
if [ "$hwmon_dev" = "$device_path" ]; then
|
|
||||||
power_cap_file="$hwmon/power1_cap"
|
|
||||||
if [ -e "$power_cap_file" ]; then
|
|
||||||
echo ${watt_limit}000000 > $power_cap_file
|
|
||||||
found=1
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ $found -eq 0 ]; then
|
# Read GPU and driver information
|
||||||
echo "Warning: Could not find power1_cap for this GPU."
|
# Try to get the product name first, fallback to hwmon name
|
||||||
fi
|
gpu_name=$(cat "$device_path/product_name" 2>/dev/null)
|
||||||
fi
|
if [ -z "$gpu_name" ]; then
|
||||||
|
gpu_name=$(cat "$hwmon/name" 2>/dev/null || echo "unknown")
|
||||||
|
fi
|
||||||
|
driver_link=$(readlink "$device_path/driver" 2>/dev/null)
|
||||||
|
driver_name=$(basename "$driver_link" 2>/dev/null || echo "unknown")
|
||||||
|
|
||||||
echo
|
printf "Target GPU detected at %s (%s)\n" "$device_path" "$hwmon"
|
||||||
|
printf "\tGPU: %s | Driver: %s\n" "$gpu_name" "$driver_name"
|
||||||
|
printf "\tSetting power limit to %d W\n" "$watt_limit"
|
||||||
|
|
||||||
|
# Set power limit (convert watts to microwatts)
|
||||||
|
if echo "${watt_limit}000000" > "$power_cap_file" 2>/dev/null; then
|
||||||
|
printf "\tSuccessfully set power limit\n"
|
||||||
|
devices_found=$((devices_found + 1))
|
||||||
|
else
|
||||||
|
printf "\tError: Failed to write to %s\n" "$power_cap_file" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
if [ $devices_found -eq 0 ]; then
|
||||||
|
echo "Warning: No target GPUs with power limiting capability found" >&2
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Successfully configured power limits for $devices_found device(s)"
|
||||||
|
|||||||
Reference in New Issue
Block a user