From 24c6fa629d85584f088b6dd51b6863dec6e0fa8b Mon Sep 17 00:00:00 2001 From: Moritz Martinius Date: Sun, 2 Nov 2025 15:23:08 +0000 Subject: [PATCH] Improve the power throtteling script --- Readme.md => README.md | 0 throttle_powerlimit/{Readme.md => README.md} | 0 throttle_powerlimit/throttle_instinct.service | 2 + throttle_powerlimit/throttle_instinct.sh | 92 ++++++++++++------- 4 files changed, 63 insertions(+), 31 deletions(-) rename Readme.md => README.md (100%) rename throttle_powerlimit/{Readme.md => README.md} (100%) diff --git a/Readme.md b/README.md similarity index 100% rename from Readme.md rename to README.md diff --git a/throttle_powerlimit/Readme.md b/throttle_powerlimit/README.md similarity index 100% rename from throttle_powerlimit/Readme.md rename to throttle_powerlimit/README.md diff --git a/throttle_powerlimit/throttle_instinct.service b/throttle_powerlimit/throttle_instinct.service index cc94479..ba7a51e 100644 --- a/throttle_powerlimit/throttle_instinct.service +++ b/throttle_powerlimit/throttle_instinct.service @@ -4,6 +4,8 @@ After=sysinit.target [Service] Type=oneshot +# Set the power limit in watts via environment variable (default: 120W) +Environment="POWER_LIMIT_WATTS=120" ExecStart=/usr/local/bin/throttle_instinct.sh RemainAfterExit=yes diff --git a/throttle_powerlimit/throttle_instinct.sh b/throttle_powerlimit/throttle_instinct.sh index 9fbb728..0f8a4cc 100755 --- a/throttle_powerlimit/throttle_instinct.sh +++ b/throttle_powerlimit/throttle_instinct.sh @@ -1,45 +1,75 @@ #!/bin/bash -watt_limit=120 +# Check if running as root +if [ "$EUID" -ne 0 ]; then + echo "Error: This script must be run as root (sudo or with appropriate privileges)" >&2 + exit 1 +fi -for i in {0..10}; do - card="/sys/class/drm/card$i" - # Skip non-existing, virtual or non-GPU devices - if [ ! -d "$card" ] || [ ! -e "$card/device" ]; then +# Allow power limit to be set via environment variable or use default +watt_limit=${POWER_LIMIT_WATTS:-225} + +# Iterate through hwmon devices instead of DRM cards +# This only processes devices that have hwmon capabilities +devices_found=0 + +for hwmon in /sys/class/hwmon/hwmon*; do + # Skip if hwmon device doesn't exist or has no device link + if [ ! -d "$hwmon" ] || [ ! -e "$hwmon/device" ]; then + continue + fi + + # Check if power1_cap exists (this device supports power limiting) + power_cap_file="$hwmon/power1_cap" + if [ ! -e "$power_cap_file" ]; then continue fi # Resolve the PCI device path - device_path=$(readlink -f "$card/device") + device_path=$(readlink -f "$hwmon/device") # Read vendor and device IDs from sysfs - vendor_id=$(cat "$device_path/vendor") - device_id=$(cat "$device_path/device") + if [ ! -e "$device_path/vendor" ] || [ ! -e "$device_path/device" ]; then + continue + fi - if [ "$vendor_id" == "0x1002" ] && [ "$device_id" == "0x66a0" ]; then - echo "Vega detected, limiting to $watt_limit W." + vendor_id=$(cat "$device_path/vendor" 2>/dev/null) + device_id=$(cat "$device_path/device" 2>/dev/null) - # TODO check if the hwmon / power indices are stable with multiple cards or - # make the script robust to always set the correct power cap - found=0 - for hwmon in /sys/class/hwmon/hwmon*; do - if [ -e "$hwmon/device" ]; then - hwmon_dev=$(readlink -f "$hwmon/device") - if [ "$hwmon_dev" = "$device_path" ]; then - power_cap_file="$hwmon/power1_cap" - if [ -e "$power_cap_file" ]; then - echo ${watt_limit}000000 > $power_cap_file - found=1 - break - fi - fi - fi - done + # Check if this is a target GPU + # AMD Vega 20 / MI50: 0x1002:0x66a0 or 0x1002:0x66a1 + # Test device: 0x1234:0x1111 + if { [ "$vendor_id" == "0x1002" ] && + { [ "$device_id" == "0x66a0" ] || [ "$device_id" == "0x66a1" ]; }; }; then - if [ $found -eq 0 ]; then - echo "Warning: Could not find power1_cap for this GPU." - fi - fi + # Read GPU and driver information + # Try to get the product name first, fallback to hwmon name + gpu_name=$(cat "$device_path/product_name" 2>/dev/null) + if [ -z "$gpu_name" ]; then + gpu_name=$(cat "$hwmon/name" 2>/dev/null || echo "unknown") + fi + driver_link=$(readlink "$device_path/driver" 2>/dev/null) + driver_name=$(basename "$driver_link" 2>/dev/null || echo "unknown") - echo + printf "Target GPU detected at %s (%s)\n" "$device_path" "$hwmon" + printf "\tGPU: %s | Driver: %s\n" "$gpu_name" "$driver_name" + printf "\tSetting power limit to %d W\n" "$watt_limit" + + # Set power limit (convert watts to microwatts) + if echo "${watt_limit}000000" > "$power_cap_file" 2>/dev/null; then + printf "\tSuccessfully set power limit\n" + devices_found=$((devices_found + 1)) + else + printf "\tError: Failed to write to %s\n" "$power_cap_file" >&2 + fi + + echo + fi done + +if [ $devices_found -eq 0 ]; then + echo "Warning: No target GPUs with power limiting capability found" >&2 + exit 0 +fi + +echo "Successfully configured power limits for $devices_found device(s)"