Improve the power throtteling script

This commit is contained in:
2025-11-02 15:23:08 +00:00
parent 0dba2464b2
commit 24c6fa629d
4 changed files with 63 additions and 31 deletions

View File

@@ -4,6 +4,8 @@ After=sysinit.target
[Service]
Type=oneshot
# Set the power limit in watts via environment variable (default: 120W)
Environment="POWER_LIMIT_WATTS=120"
ExecStart=/usr/local/bin/throttle_instinct.sh
RemainAfterExit=yes

View File

@@ -1,45 +1,75 @@
#!/bin/bash
watt_limit=120
# Check if running as root
if [ "$EUID" -ne 0 ]; then
echo "Error: This script must be run as root (sudo or with appropriate privileges)" >&2
exit 1
fi
for i in {0..10}; do
card="/sys/class/drm/card$i"
# Skip non-existing, virtual or non-GPU devices
if [ ! -d "$card" ] || [ ! -e "$card/device" ]; then
# Allow power limit to be set via environment variable or use default
watt_limit=${POWER_LIMIT_WATTS:-225}
# Iterate through hwmon devices instead of DRM cards
# This only processes devices that have hwmon capabilities
devices_found=0
for hwmon in /sys/class/hwmon/hwmon*; do
# Skip if hwmon device doesn't exist or has no device link
if [ ! -d "$hwmon" ] || [ ! -e "$hwmon/device" ]; then
continue
fi
# Check if power1_cap exists (this device supports power limiting)
power_cap_file="$hwmon/power1_cap"
if [ ! -e "$power_cap_file" ]; then
continue
fi
# Resolve the PCI device path
device_path=$(readlink -f "$card/device")
device_path=$(readlink -f "$hwmon/device")
# Read vendor and device IDs from sysfs
vendor_id=$(cat "$device_path/vendor")
device_id=$(cat "$device_path/device")
if [ ! -e "$device_path/vendor" ] || [ ! -e "$device_path/device" ]; then
continue
fi
if [ "$vendor_id" == "0x1002" ] && [ "$device_id" == "0x66a0" ]; then
echo "Vega detected, limiting to $watt_limit W."
vendor_id=$(cat "$device_path/vendor" 2>/dev/null)
device_id=$(cat "$device_path/device" 2>/dev/null)
# TODO check if the hwmon / power indices are stable with multiple cards or
# make the script robust to always set the correct power cap
found=0
for hwmon in /sys/class/hwmon/hwmon*; do
if [ -e "$hwmon/device" ]; then
hwmon_dev=$(readlink -f "$hwmon/device")
if [ "$hwmon_dev" = "$device_path" ]; then
power_cap_file="$hwmon/power1_cap"
if [ -e "$power_cap_file" ]; then
echo ${watt_limit}000000 > $power_cap_file
found=1
break
fi
fi
fi
done
# Check if this is a target GPU
# AMD Vega 20 / MI50: 0x1002:0x66a0 or 0x1002:0x66a1
# Test device: 0x1234:0x1111
if { [ "$vendor_id" == "0x1002" ] &&
{ [ "$device_id" == "0x66a0" ] || [ "$device_id" == "0x66a1" ]; }; }; then
if [ $found -eq 0 ]; then
echo "Warning: Could not find power1_cap for this GPU."
fi
fi
# Read GPU and driver information
# Try to get the product name first, fallback to hwmon name
gpu_name=$(cat "$device_path/product_name" 2>/dev/null)
if [ -z "$gpu_name" ]; then
gpu_name=$(cat "$hwmon/name" 2>/dev/null || echo "unknown")
fi
driver_link=$(readlink "$device_path/driver" 2>/dev/null)
driver_name=$(basename "$driver_link" 2>/dev/null || echo "unknown")
echo
printf "Target GPU detected at %s (%s)\n" "$device_path" "$hwmon"
printf "\tGPU: %s | Driver: %s\n" "$gpu_name" "$driver_name"
printf "\tSetting power limit to %d W\n" "$watt_limit"
# Set power limit (convert watts to microwatts)
if echo "${watt_limit}000000" > "$power_cap_file" 2>/dev/null; then
printf "\tSuccessfully set power limit\n"
devices_found=$((devices_found + 1))
else
printf "\tError: Failed to write to %s\n" "$power_cap_file" >&2
fi
echo
fi
done
if [ $devices_found -eq 0 ]; then
echo "Warning: No target GPUs with power limiting capability found" >&2
exit 0
fi
echo "Successfully configured power limits for $devices_found device(s)"