Improve the power throtteling script
This commit is contained in:
@@ -4,6 +4,8 @@ After=sysinit.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
# Set the power limit in watts via environment variable (default: 120W)
|
||||
Environment="POWER_LIMIT_WATTS=120"
|
||||
ExecStart=/usr/local/bin/throttle_instinct.sh
|
||||
RemainAfterExit=yes
|
||||
|
||||
|
||||
@@ -1,45 +1,75 @@
|
||||
#!/bin/bash
|
||||
|
||||
watt_limit=120
|
||||
# Check if running as root
|
||||
if [ "$EUID" -ne 0 ]; then
|
||||
echo "Error: This script must be run as root (sudo or with appropriate privileges)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for i in {0..10}; do
|
||||
card="/sys/class/drm/card$i"
|
||||
# Skip non-existing, virtual or non-GPU devices
|
||||
if [ ! -d "$card" ] || [ ! -e "$card/device" ]; then
|
||||
# Allow power limit to be set via environment variable or use default
|
||||
watt_limit=${POWER_LIMIT_WATTS:-225}
|
||||
|
||||
# Iterate through hwmon devices instead of DRM cards
|
||||
# This only processes devices that have hwmon capabilities
|
||||
devices_found=0
|
||||
|
||||
for hwmon in /sys/class/hwmon/hwmon*; do
|
||||
# Skip if hwmon device doesn't exist or has no device link
|
||||
if [ ! -d "$hwmon" ] || [ ! -e "$hwmon/device" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Check if power1_cap exists (this device supports power limiting)
|
||||
power_cap_file="$hwmon/power1_cap"
|
||||
if [ ! -e "$power_cap_file" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Resolve the PCI device path
|
||||
device_path=$(readlink -f "$card/device")
|
||||
device_path=$(readlink -f "$hwmon/device")
|
||||
|
||||
# Read vendor and device IDs from sysfs
|
||||
vendor_id=$(cat "$device_path/vendor")
|
||||
device_id=$(cat "$device_path/device")
|
||||
if [ ! -e "$device_path/vendor" ] || [ ! -e "$device_path/device" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ "$vendor_id" == "0x1002" ] && [ "$device_id" == "0x66a0" ]; then
|
||||
echo "Vega detected, limiting to $watt_limit W."
|
||||
vendor_id=$(cat "$device_path/vendor" 2>/dev/null)
|
||||
device_id=$(cat "$device_path/device" 2>/dev/null)
|
||||
|
||||
# TODO check if the hwmon / power indices are stable with multiple cards or
|
||||
# make the script robust to always set the correct power cap
|
||||
found=0
|
||||
for hwmon in /sys/class/hwmon/hwmon*; do
|
||||
if [ -e "$hwmon/device" ]; then
|
||||
hwmon_dev=$(readlink -f "$hwmon/device")
|
||||
if [ "$hwmon_dev" = "$device_path" ]; then
|
||||
power_cap_file="$hwmon/power1_cap"
|
||||
if [ -e "$power_cap_file" ]; then
|
||||
echo ${watt_limit}000000 > $power_cap_file
|
||||
found=1
|
||||
break
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
# Check if this is a target GPU
|
||||
# AMD Vega 20 / MI50: 0x1002:0x66a0 or 0x1002:0x66a1
|
||||
# Test device: 0x1234:0x1111
|
||||
if { [ "$vendor_id" == "0x1002" ] &&
|
||||
{ [ "$device_id" == "0x66a0" ] || [ "$device_id" == "0x66a1" ]; }; }; then
|
||||
|
||||
if [ $found -eq 0 ]; then
|
||||
echo "Warning: Could not find power1_cap for this GPU."
|
||||
fi
|
||||
fi
|
||||
# Read GPU and driver information
|
||||
# Try to get the product name first, fallback to hwmon name
|
||||
gpu_name=$(cat "$device_path/product_name" 2>/dev/null)
|
||||
if [ -z "$gpu_name" ]; then
|
||||
gpu_name=$(cat "$hwmon/name" 2>/dev/null || echo "unknown")
|
||||
fi
|
||||
driver_link=$(readlink "$device_path/driver" 2>/dev/null)
|
||||
driver_name=$(basename "$driver_link" 2>/dev/null || echo "unknown")
|
||||
|
||||
echo
|
||||
printf "Target GPU detected at %s (%s)\n" "$device_path" "$hwmon"
|
||||
printf "\tGPU: %s | Driver: %s\n" "$gpu_name" "$driver_name"
|
||||
printf "\tSetting power limit to %d W\n" "$watt_limit"
|
||||
|
||||
# Set power limit (convert watts to microwatts)
|
||||
if echo "${watt_limit}000000" > "$power_cap_file" 2>/dev/null; then
|
||||
printf "\tSuccessfully set power limit\n"
|
||||
devices_found=$((devices_found + 1))
|
||||
else
|
||||
printf "\tError: Failed to write to %s\n" "$power_cap_file" >&2
|
||||
fi
|
||||
|
||||
echo
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $devices_found -eq 0 ]; then
|
||||
echo "Warning: No target GPUs with power limiting capability found" >&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Successfully configured power limits for $devices_found device(s)"
|
||||
|
||||
Reference in New Issue
Block a user